@inproceedings{seraji-etal-2014-persian,
title = "A {P}ersian Treebank with {S}tanford Typed Dependencies",
author = "Seraji, Mojgan and
Jahani, Carina and
Megyesi, Be{\'a}ta and
Nivre, Joakim",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/378_Paper.pdf",
pages = "796--801",
abstract = "We present the Uppsala Persian Dependency Treebank (UPDT) with a syntactic annotation scheme based on Stanford Typed Dependencies. The treebank consists of 6,000 sentences and 151,671 tokens with an average sentence length of 25 words. The data is from different genres, including newspaper articles and fiction, as well as technical descriptions and texts about culture and art, taken from the open source Uppsala Persian Corpus (UPC). The syntactic annotation scheme is extended for Persian to include all syntactic relations that could not be covered by the primary scheme developed for English. In addition, we present open source tools for automatic analysis of Persian containing a text normalizer, a sentence segmenter and tokenizer, a part-of-speech tagger, and a parser. The treebank and the parser have been developed simultaneously in a bootstrapping procedure. The result of a parsing experiment shows an overall labeled attachment score of 82.05{\%} and an unlabeled attachment score of 85.29{\%}. The treebank is freely available as an open source resource.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="seraji-etal-2014-persian">
<titleInfo>
<title>A Persian Treebank with Stanford Typed Dependencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mojgan</namePart>
<namePart type="family">Seraji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carina</namePart>
<namePart type="family">Jahani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beáta</namePart>
<namePart type="family">Megyesi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Nivre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the Uppsala Persian Dependency Treebank (UPDT) with a syntactic annotation scheme based on Stanford Typed Dependencies. The treebank consists of 6,000 sentences and 151,671 tokens with an average sentence length of 25 words. The data is from different genres, including newspaper articles and fiction, as well as technical descriptions and texts about culture and art, taken from the open source Uppsala Persian Corpus (UPC). The syntactic annotation scheme is extended for Persian to include all syntactic relations that could not be covered by the primary scheme developed for English. In addition, we present open source tools for automatic analysis of Persian containing a text normalizer, a sentence segmenter and tokenizer, a part-of-speech tagger, and a parser. The treebank and the parser have been developed simultaneously in a bootstrapping procedure. The result of a parsing experiment shows an overall labeled attachment score of 82.05% and an unlabeled attachment score of 85.29%. The treebank is freely available as an open source resource.</abstract>
<identifier type="citekey">seraji-etal-2014-persian</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/378_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>796</start>
<end>801</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Persian Treebank with Stanford Typed Dependencies
%A Seraji, Mojgan
%A Jahani, Carina
%A Megyesi, Beáta
%A Nivre, Joakim
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F seraji-etal-2014-persian
%X We present the Uppsala Persian Dependency Treebank (UPDT) with a syntactic annotation scheme based on Stanford Typed Dependencies. The treebank consists of 6,000 sentences and 151,671 tokens with an average sentence length of 25 words. The data is from different genres, including newspaper articles and fiction, as well as technical descriptions and texts about culture and art, taken from the open source Uppsala Persian Corpus (UPC). The syntactic annotation scheme is extended for Persian to include all syntactic relations that could not be covered by the primary scheme developed for English. In addition, we present open source tools for automatic analysis of Persian containing a text normalizer, a sentence segmenter and tokenizer, a part-of-speech tagger, and a parser. The treebank and the parser have been developed simultaneously in a bootstrapping procedure. The result of a parsing experiment shows an overall labeled attachment score of 82.05% and an unlabeled attachment score of 85.29%. The treebank is freely available as an open source resource.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/378_Paper.pdf
%P 796-801
Markdown (Informal)
[A Persian Treebank with Stanford Typed Dependencies](http://www.lrec-conf.org/proceedings/lrec2014/pdf/378_Paper.pdf) (Seraji et al., LREC 2014)
ACL
- Mojgan Seraji, Carina Jahani, Beáta Megyesi, and Joakim Nivre. 2014. A Persian Treebank with Stanford Typed Dependencies. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 796–801, Reykjavik, Iceland. European Language Resources Association (ELRA).