@inproceedings{ahrenberg-2010-alignment,
title = "Alignment-based Profiling of {E}uroparl Data in an {E}nglish-{S}wedish Parallel Corpus",
author = "Ahrenberg, Lars",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/193_Paper.pdf",
abstract = "This paper profiles the Europarl part of an English-Swedish parallel corpus and compares it with three other subcorpora of the same parallel corpus. We first describe our method for comparison which is based on manually reviewed word alignments. We investigate relative frequences of different types of correspondence, including null alignments, many-to-one correspondences and crossings. In addition, both halves of the parallel corpus have been annotated with morpho-syntactic information. The syntactic annotation uses labelled dependency relations. Thus, we can see how different types of correspondences are distributed on different parts-of-speech and compute correspondences at the structural level. In spite of the fact that two of the other subcorpora contains fiction, it is found that the Europarl part is the one having the highest proportion of many types of restructurings, including additions, deletions, long distance reorderings and dependency reversals. We explain this by the fact that the majority of Europarl segments are parallel translations rather than source texts and their translations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ahrenberg-2010-alignment">
<titleInfo>
<title>Alignment-based Profiling of Europarl Data in an English-Swedish Parallel Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lars</namePart>
<namePart type="family">Ahrenberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper profiles the Europarl part of an English-Swedish parallel corpus and compares it with three other subcorpora of the same parallel corpus. We first describe our method for comparison which is based on manually reviewed word alignments. We investigate relative frequences of different types of correspondence, including null alignments, many-to-one correspondences and crossings. In addition, both halves of the parallel corpus have been annotated with morpho-syntactic information. The syntactic annotation uses labelled dependency relations. Thus, we can see how different types of correspondences are distributed on different parts-of-speech and compute correspondences at the structural level. In spite of the fact that two of the other subcorpora contains fiction, it is found that the Europarl part is the one having the highest proportion of many types of restructurings, including additions, deletions, long distance reorderings and dependency reversals. We explain this by the fact that the majority of Europarl segments are parallel translations rather than source texts and their translations.</abstract>
<identifier type="citekey">ahrenberg-2010-alignment</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/193_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Alignment-based Profiling of Europarl Data in an English-Swedish Parallel Corpus
%A Ahrenberg, Lars
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F ahrenberg-2010-alignment
%X This paper profiles the Europarl part of an English-Swedish parallel corpus and compares it with three other subcorpora of the same parallel corpus. We first describe our method for comparison which is based on manually reviewed word alignments. We investigate relative frequences of different types of correspondence, including null alignments, many-to-one correspondences and crossings. In addition, both halves of the parallel corpus have been annotated with morpho-syntactic information. The syntactic annotation uses labelled dependency relations. Thus, we can see how different types of correspondences are distributed on different parts-of-speech and compute correspondences at the structural level. In spite of the fact that two of the other subcorpora contains fiction, it is found that the Europarl part is the one having the highest proportion of many types of restructurings, including additions, deletions, long distance reorderings and dependency reversals. We explain this by the fact that the majority of Europarl segments are parallel translations rather than source texts and their translations.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/193_Paper.pdf
Markdown (Informal)
[Alignment-based Profiling of Europarl Data in an English-Swedish Parallel Corpus](http://www.lrec-conf.org/proceedings/lrec2010/pdf/193_Paper.pdf) (Ahrenberg, LREC 2010)
ACL