@inproceedings{kolachina-kolachina-2012-parsing,
title = "Parsing Any Domain {E}nglish text to {C}o{NLL} dependencies",
author = "Kolachina, Sudheer and
Kolachina, Prasanth",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/1097_Paper.pdf",
pages = "3873--3880",
abstract = "It is well known that accuracies of statistical parsers trained over Penn Treebank on test sets drawn from the same corpus tend to be overestimates of their actual parsing performance. This gives rise to the need for evaluation of parsing performance on corpora from different domains. Evaluating multiple parsers on test sets from different domains can give a detailed picture about the relative strengths/weaknesses of different parsing approaches. Such information is also necessary to guide choice of parser in applications such as machine translation where text from multiple domains needs to be handled. In this paper, we report a benchmarking study of different state-of-art parsers for English, both constituency and dependency. The constituency parser output is converted into CoNLL-style dependency trees so that parsing performance can be compared across formalisms. Specifically, we train rerankers for Berkeley and Stanford parsers to study the usefulness of reranking for handling texts from different domains. The results of our experiments lead to interesting insights about the out-of-domain performance of different English parsers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kolachina-kolachina-2012-parsing">
<titleInfo>
<title>Parsing Any Domain English text to CoNLL dependencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sudheer</namePart>
<namePart type="family">Kolachina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prasanth</namePart>
<namePart type="family">Kolachina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It is well known that accuracies of statistical parsers trained over Penn Treebank on test sets drawn from the same corpus tend to be overestimates of their actual parsing performance. This gives rise to the need for evaluation of parsing performance on corpora from different domains. Evaluating multiple parsers on test sets from different domains can give a detailed picture about the relative strengths/weaknesses of different parsing approaches. Such information is also necessary to guide choice of parser in applications such as machine translation where text from multiple domains needs to be handled. In this paper, we report a benchmarking study of different state-of-art parsers for English, both constituency and dependency. The constituency parser output is converted into CoNLL-style dependency trees so that parsing performance can be compared across formalisms. Specifically, we train rerankers for Berkeley and Stanford parsers to study the usefulness of reranking for handling texts from different domains. The results of our experiments lead to interesting insights about the out-of-domain performance of different English parsers.</abstract>
<identifier type="citekey">kolachina-kolachina-2012-parsing</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1097_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>3873</start>
<end>3880</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Parsing Any Domain English text to CoNLL dependencies
%A Kolachina, Sudheer
%A Kolachina, Prasanth
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F kolachina-kolachina-2012-parsing
%X It is well known that accuracies of statistical parsers trained over Penn Treebank on test sets drawn from the same corpus tend to be overestimates of their actual parsing performance. This gives rise to the need for evaluation of parsing performance on corpora from different domains. Evaluating multiple parsers on test sets from different domains can give a detailed picture about the relative strengths/weaknesses of different parsing approaches. Such information is also necessary to guide choice of parser in applications such as machine translation where text from multiple domains needs to be handled. In this paper, we report a benchmarking study of different state-of-art parsers for English, both constituency and dependency. The constituency parser output is converted into CoNLL-style dependency trees so that parsing performance can be compared across formalisms. Specifically, we train rerankers for Berkeley and Stanford parsers to study the usefulness of reranking for handling texts from different domains. The results of our experiments lead to interesting insights about the out-of-domain performance of different English parsers.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/1097_Paper.pdf
%P 3873-3880
Markdown (Informal)
[Parsing Any Domain English text to CoNLL dependencies](http://www.lrec-conf.org/proceedings/lrec2012/pdf/1097_Paper.pdf) (Kolachina & Kolachina, LREC 2012)
ACL
- Sudheer Kolachina and Prasanth Kolachina. 2012. Parsing Any Domain English text to CoNLL dependencies. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 3873–3880, Istanbul, Turkey. European Language Resources Association (ELRA).