@inproceedings{proisl-uhrig-2012-efficient,
title = "Efficient Dependency Graph Matching with the {IMS} Open Corpus Workbench",
author = "Proisl, Thomas and
Uhrig, Peter",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/709_Paper.pdf",
pages = "2750--2756",
abstract = "State-of-the-art dependency representations such as the Stanford Typed Dependencies may represent the grammatical relations in a sentence as directed, possibly cyclic graphs. Querying a syntactically annotated corpus for grammatical structures that are represented as graphs requires graph matching, which is a non-trivial task. In this paper, we present an algorithm for graph matching that is tailored to the properties of large, syntactically annotated corpora. The implementation of the algorithm is built on top of the popular IMS Open Corpus Workbench, allowing corpus linguists to re-use existing infrastructure. An evaluation of the resulting software, CWB-treebank, shows that its performance in real world applications, such as a web query interface, compares favourably to implementations that rely on a relational database or a dedicated graph database while at the same time offering a greater expressive power for queries. An intuitive graphical interface for building the query graphs is available via the Treebank.info project.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="proisl-uhrig-2012-efficient">
<titleInfo>
<title>Efficient Dependency Graph Matching with the IMS Open Corpus Workbench</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Proisl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Uhrig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>State-of-the-art dependency representations such as the Stanford Typed Dependencies may represent the grammatical relations in a sentence as directed, possibly cyclic graphs. Querying a syntactically annotated corpus for grammatical structures that are represented as graphs requires graph matching, which is a non-trivial task. In this paper, we present an algorithm for graph matching that is tailored to the properties of large, syntactically annotated corpora. The implementation of the algorithm is built on top of the popular IMS Open Corpus Workbench, allowing corpus linguists to re-use existing infrastructure. An evaluation of the resulting software, CWB-treebank, shows that its performance in real world applications, such as a web query interface, compares favourably to implementations that rely on a relational database or a dedicated graph database while at the same time offering a greater expressive power for queries. An intuitive graphical interface for building the query graphs is available via the Treebank.info project.</abstract>
<identifier type="citekey">proisl-uhrig-2012-efficient</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/709_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>2750</start>
<end>2756</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Efficient Dependency Graph Matching with the IMS Open Corpus Workbench
%A Proisl, Thomas
%A Uhrig, Peter
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F proisl-uhrig-2012-efficient
%X State-of-the-art dependency representations such as the Stanford Typed Dependencies may represent the grammatical relations in a sentence as directed, possibly cyclic graphs. Querying a syntactically annotated corpus for grammatical structures that are represented as graphs requires graph matching, which is a non-trivial task. In this paper, we present an algorithm for graph matching that is tailored to the properties of large, syntactically annotated corpora. The implementation of the algorithm is built on top of the popular IMS Open Corpus Workbench, allowing corpus linguists to re-use existing infrastructure. An evaluation of the resulting software, CWB-treebank, shows that its performance in real world applications, such as a web query interface, compares favourably to implementations that rely on a relational database or a dedicated graph database while at the same time offering a greater expressive power for queries. An intuitive graphical interface for building the query graphs is available via the Treebank.info project.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/709_Paper.pdf
%P 2750-2756
Markdown (Informal)
[Efficient Dependency Graph Matching with the IMS Open Corpus Workbench](http://www.lrec-conf.org/proceedings/lrec2012/pdf/709_Paper.pdf) (Proisl & Uhrig, LREC 2012)
ACL