@inproceedings{attia-etal-2012-automatic,
title = "Automatic Extraction and Evaluation of {A}rabic {LFG} Resources",
author = "Attia, Mohammed and
Shaalan, Khaled and
Tounsi, Lamia and
van Genabith, Josef",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/609_Paper.pdf",
pages = "1947--1954",
abstract = "This paper presents the results of an approach to automatically acquire large-scale, probabilistic Lexical-Functional Grammar (LFG) resources for Arabic from the Penn Arabic Treebank (ATB). Our starting point is the earlier, work of (Tounsi et al., 2009) on automatic LFG f(eature)-structure annotation for Arabic using the ATB. They exploit tree configuration, POS categories, functional tags, local heads and trace information to annotate nodes with LFG feature-structure equations. We utilize this annotation to automatically acquire grammatical function (dependency) based subcategorization frames and paths linking long-distance dependencies (LDDs). Many state-of-the-art treebank-based probabilistic parsing approaches are scalable and robust but often also shallow: they do not capture LDDs and represent only local information. Subcategorization frames and LDD paths can be used to recover LDDs from such parser output to capture deep linguistic information. Automatic acquisition of language resources from existing treebanks saves time and effort involved in creating such resources by hand. Moreover, data-driven automatic acquisition naturally associates probabilistic information with subcategorization frames and LDD paths. Finally, based on the statistical distribution of LDD path types, we propose empirical bounds on traditional regular expression based functional uncertainty equations used to handle LDDs in LFG.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="attia-etal-2012-automatic">
<titleInfo>
<title>Automatic Extraction and Evaluation of Arabic LFG Resources</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Attia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khaled</namePart>
<namePart type="family">Shaalan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lamia</namePart>
<namePart type="family">Tounsi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">van Genabith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the results of an approach to automatically acquire large-scale, probabilistic Lexical-Functional Grammar (LFG) resources for Arabic from the Penn Arabic Treebank (ATB). Our starting point is the earlier, work of (Tounsi et al., 2009) on automatic LFG f(eature)-structure annotation for Arabic using the ATB. They exploit tree configuration, POS categories, functional tags, local heads and trace information to annotate nodes with LFG feature-structure equations. We utilize this annotation to automatically acquire grammatical function (dependency) based subcategorization frames and paths linking long-distance dependencies (LDDs). Many state-of-the-art treebank-based probabilistic parsing approaches are scalable and robust but often also shallow: they do not capture LDDs and represent only local information. Subcategorization frames and LDD paths can be used to recover LDDs from such parser output to capture deep linguistic information. Automatic acquisition of language resources from existing treebanks saves time and effort involved in creating such resources by hand. Moreover, data-driven automatic acquisition naturally associates probabilistic information with subcategorization frames and LDD paths. Finally, based on the statistical distribution of LDD path types, we propose empirical bounds on traditional regular expression based functional uncertainty equations used to handle LDDs in LFG.</abstract>
<identifier type="citekey">attia-etal-2012-automatic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/609_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>1947</start>
<end>1954</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Extraction and Evaluation of Arabic LFG Resources
%A Attia, Mohammed
%A Shaalan, Khaled
%A Tounsi, Lamia
%A van Genabith, Josef
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F attia-etal-2012-automatic
%X This paper presents the results of an approach to automatically acquire large-scale, probabilistic Lexical-Functional Grammar (LFG) resources for Arabic from the Penn Arabic Treebank (ATB). Our starting point is the earlier, work of (Tounsi et al., 2009) on automatic LFG f(eature)-structure annotation for Arabic using the ATB. They exploit tree configuration, POS categories, functional tags, local heads and trace information to annotate nodes with LFG feature-structure equations. We utilize this annotation to automatically acquire grammatical function (dependency) based subcategorization frames and paths linking long-distance dependencies (LDDs). Many state-of-the-art treebank-based probabilistic parsing approaches are scalable and robust but often also shallow: they do not capture LDDs and represent only local information. Subcategorization frames and LDD paths can be used to recover LDDs from such parser output to capture deep linguistic information. Automatic acquisition of language resources from existing treebanks saves time and effort involved in creating such resources by hand. Moreover, data-driven automatic acquisition naturally associates probabilistic information with subcategorization frames and LDD paths. Finally, based on the statistical distribution of LDD path types, we propose empirical bounds on traditional regular expression based functional uncertainty equations used to handle LDDs in LFG.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/609_Paper.pdf
%P 1947-1954
Markdown (Informal)
[Automatic Extraction and Evaluation of Arabic LFG Resources](http://www.lrec-conf.org/proceedings/lrec2012/pdf/609_Paper.pdf) (Attia et al., LREC 2012)
ACL
- Mohammed Attia, Khaled Shaalan, Lamia Tounsi, and Josef van Genabith. 2012. Automatic Extraction and Evaluation of Arabic LFG Resources. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 1947–1954, Istanbul, Turkey. European Language Resources Association (ELRA).