@inproceedings{spreyer-etal-2008-identification,
title = "Identification of Comparable Argument-Head Relations in Parallel Corpora",
author = "Spreyer, Kathrin and
Kuhn, Jonas and
Schrader, Bettina",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/475_paper.pdf",
abstract = "We present the machine learning framework that we are developing, in order to support explorative search for non-trivial linguistic configurations in low-density languages (languages with no or few NLP tools). The approach exploits advanced existing analysis tools for high-density languages and word-aligned multi-parallel corpora to bridge across languages. The goal is to find a methodology that minimizes the amount of human expert intervention needed, while producing high-quality search and annotation tools. One of the main challenges is the susceptibility of a complex system combining various automatic analysis components to hard-to-control noise from a number of sources. We present systematic experiments investigating to what degree the noise issue can be overcome by (i) exploiting more than one perspective on the target language data by considering multiple translations in the parallel corpus, and (ii) using minimally supervised learning techniques such as co-training and self-training to take advantage of a larger pool of data for generalization. We observe that while (i) does help in the training individual machine learning models, a cyclic bootstrapping process seems to suffer too much from noise. A preliminary conclusion is that in a practical approach, one has to rely on a higher degree of supervision or on noise detection heuristics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="spreyer-etal-2008-identification">
<titleInfo>
<title>Identification of Comparable Argument-Head Relations in Parallel Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kathrin</namePart>
<namePart type="family">Spreyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Kuhn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bettina</namePart>
<namePart type="family">Schrader</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the machine learning framework that we are developing, in order to support explorative search for non-trivial linguistic configurations in low-density languages (languages with no or few NLP tools). The approach exploits advanced existing analysis tools for high-density languages and word-aligned multi-parallel corpora to bridge across languages. The goal is to find a methodology that minimizes the amount of human expert intervention needed, while producing high-quality search and annotation tools. One of the main challenges is the susceptibility of a complex system combining various automatic analysis components to hard-to-control noise from a number of sources. We present systematic experiments investigating to what degree the noise issue can be overcome by (i) exploiting more than one perspective on the target language data by considering multiple translations in the parallel corpus, and (ii) using minimally supervised learning techniques such as co-training and self-training to take advantage of a larger pool of data for generalization. We observe that while (i) does help in the training individual machine learning models, a cyclic bootstrapping process seems to suffer too much from noise. A preliminary conclusion is that in a practical approach, one has to rely on a higher degree of supervision or on noise detection heuristics.</abstract>
<identifier type="citekey">spreyer-etal-2008-identification</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/475_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identification of Comparable Argument-Head Relations in Parallel Corpora
%A Spreyer, Kathrin
%A Kuhn, Jonas
%A Schrader, Bettina
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F spreyer-etal-2008-identification
%X We present the machine learning framework that we are developing, in order to support explorative search for non-trivial linguistic configurations in low-density languages (languages with no or few NLP tools). The approach exploits advanced existing analysis tools for high-density languages and word-aligned multi-parallel corpora to bridge across languages. The goal is to find a methodology that minimizes the amount of human expert intervention needed, while producing high-quality search and annotation tools. One of the main challenges is the susceptibility of a complex system combining various automatic analysis components to hard-to-control noise from a number of sources. We present systematic experiments investigating to what degree the noise issue can be overcome by (i) exploiting more than one perspective on the target language data by considering multiple translations in the parallel corpus, and (ii) using minimally supervised learning techniques such as co-training and self-training to take advantage of a larger pool of data for generalization. We observe that while (i) does help in the training individual machine learning models, a cyclic bootstrapping process seems to suffer too much from noise. A preliminary conclusion is that in a practical approach, one has to rely on a higher degree of supervision or on noise detection heuristics.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/475_paper.pdf
Markdown (Informal)
[Identification of Comparable Argument-Head Relations in Parallel Corpora](http://www.lrec-conf.org/proceedings/lrec2008/pdf/475_paper.pdf) (Spreyer et al., LREC 2008)
ACL