@inproceedings{weller-heid-2012-analyzing,
title = "Analyzing and Aligning {G}erman compound nouns",
author = "Weller, Marion and
Heid, Ulrich",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/817_Paper.pdf",
pages = "2395--2400",
abstract = "In this paper, we present and evaluate an approach for the compositional alignment of compound nouns using comparable corpora from technical domains. The task of term alignment consists in relating a source language term to its translation in a list of target language terms with the help of a bilingual dictionary. Compound splitting allows to transform a compound into a sequence of components which can be translated separately and then related to multi-word target language terms. We present and evaluate a method for compound splitting, and compare two strategies for term alignment (bag-of-word vs. pattern-based). The simple word-based approach leads to a considerable amount of erroneous alignments, whereas the pattern-based approach reaches a decent precision. We also assess the reasons for alignment failures: in the comparable corpora used for our experiments, a substantial number of terms has no translation in the target language data; furthermore, the non-isomorphic structures of source and target language terms cause alignment failures in many cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="weller-heid-2012-analyzing">
<titleInfo>
<title>Analyzing and Aligning German compound nouns</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marion</namePart>
<namePart type="family">Weller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulrich</namePart>
<namePart type="family">Heid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present and evaluate an approach for the compositional alignment of compound nouns using comparable corpora from technical domains. The task of term alignment consists in relating a source language term to its translation in a list of target language terms with the help of a bilingual dictionary. Compound splitting allows to transform a compound into a sequence of components which can be translated separately and then related to multi-word target language terms. We present and evaluate a method for compound splitting, and compare two strategies for term alignment (bag-of-word vs. pattern-based). The simple word-based approach leads to a considerable amount of erroneous alignments, whereas the pattern-based approach reaches a decent precision. We also assess the reasons for alignment failures: in the comparable corpora used for our experiments, a substantial number of terms has no translation in the target language data; furthermore, the non-isomorphic structures of source and target language terms cause alignment failures in many cases.</abstract>
<identifier type="citekey">weller-heid-2012-analyzing</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/817_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>2395</start>
<end>2400</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analyzing and Aligning German compound nouns
%A Weller, Marion
%A Heid, Ulrich
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F weller-heid-2012-analyzing
%X In this paper, we present and evaluate an approach for the compositional alignment of compound nouns using comparable corpora from technical domains. The task of term alignment consists in relating a source language term to its translation in a list of target language terms with the help of a bilingual dictionary. Compound splitting allows to transform a compound into a sequence of components which can be translated separately and then related to multi-word target language terms. We present and evaluate a method for compound splitting, and compare two strategies for term alignment (bag-of-word vs. pattern-based). The simple word-based approach leads to a considerable amount of erroneous alignments, whereas the pattern-based approach reaches a decent precision. We also assess the reasons for alignment failures: in the comparable corpora used for our experiments, a substantial number of terms has no translation in the target language data; furthermore, the non-isomorphic structures of source and target language terms cause alignment failures in many cases.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/817_Paper.pdf
%P 2395-2400
Markdown (Informal)
[Analyzing and Aligning German compound nouns](http://www.lrec-conf.org/proceedings/lrec2012/pdf/817_Paper.pdf) (Weller & Heid, LREC 2012)
ACL
- Marion Weller and Ulrich Heid. 2012. Analyzing and Aligning German compound nouns. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 2395–2400, Istanbul, Turkey. European Language Resources Association (ELRA).