@inproceedings{kuhn-jellinghaus-2006-multilingual,
title = "Multilingual parallel treebanking: a lean and flexible approach",
author = "Kuhn, Jonas and
Jellinghaus, Michael",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}`06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L06-1386/",
abstract = "We propose a bootstrapping approach to creating a phrase-level alignment over a sentence-aligned parallel corpus, reporting concrete treebank annotation work performed on a sample of sentence tuples from the Europarl corpus, currently for English, French, German, and Spanish. The manually annotated seed data will be used as the basis for automatically labelling the rest of the corpus. Some preliminary experiments addressing the bootstrapping aspects are presented. The representation format for syntactic correspondence across parallel text that we propose as the starting point for a process of successive refinement emphasizes correspondences of major constituents that realize semantic arguments or modifiers; language-particular details of morphosyntactic realization are intentionally left largely unlabelled. We believe this format is a good basis for training NLPtools for multilingual application contexts in which consistency across languages is more central than fine-grained details in specific languages (in particular, syntax-based statistical Machine Translation)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuhn-jellinghaus-2006-multilingual">
<titleInfo>
<title>Multilingual parallel treebanking: a lean and flexible approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Kuhn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Jellinghaus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC‘06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a bootstrapping approach to creating a phrase-level alignment over a sentence-aligned parallel corpus, reporting concrete treebank annotation work performed on a sample of sentence tuples from the Europarl corpus, currently for English, French, German, and Spanish. The manually annotated seed data will be used as the basis for automatically labelling the rest of the corpus. Some preliminary experiments addressing the bootstrapping aspects are presented. The representation format for syntactic correspondence across parallel text that we propose as the starting point for a process of successive refinement emphasizes correspondences of major constituents that realize semantic arguments or modifiers; language-particular details of morphosyntactic realization are intentionally left largely unlabelled. We believe this format is a good basis for training NLPtools for multilingual application contexts in which consistency across languages is more central than fine-grained details in specific languages (in particular, syntax-based statistical Machine Translation).</abstract>
<identifier type="citekey">kuhn-jellinghaus-2006-multilingual</identifier>
<location>
<url>https://aclanthology.org/L06-1386/</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual parallel treebanking: a lean and flexible approach
%A Kuhn, Jonas
%A Jellinghaus, Michael
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC‘06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F kuhn-jellinghaus-2006-multilingual
%X We propose a bootstrapping approach to creating a phrase-level alignment over a sentence-aligned parallel corpus, reporting concrete treebank annotation work performed on a sample of sentence tuples from the Europarl corpus, currently for English, French, German, and Spanish. The manually annotated seed data will be used as the basis for automatically labelling the rest of the corpus. Some preliminary experiments addressing the bootstrapping aspects are presented. The representation format for syntactic correspondence across parallel text that we propose as the starting point for a process of successive refinement emphasizes correspondences of major constituents that realize semantic arguments or modifiers; language-particular details of morphosyntactic realization are intentionally left largely unlabelled. We believe this format is a good basis for training NLPtools for multilingual application contexts in which consistency across languages is more central than fine-grained details in specific languages (in particular, syntax-based statistical Machine Translation).
%U https://aclanthology.org/L06-1386/
Markdown (Informal)
[Multilingual parallel treebanking: a lean and flexible approach](https://aclanthology.org/L06-1386/) (Kuhn & Jellinghaus, LREC 2006)
ACL