@inproceedings{holmqvist-2010-heuristic,
title = "Heuristic Word Alignment with Parallel Phrases",
author = "Holmqvist, Maria",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}`10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L10-1353/",
abstract = "We present a heuristic method for word alignment, which is the task of identifying corresponding words in parallel text. The heuristic method is based on parallel phrases extracted from manually word aligned sentence pairs. Word alignment is performed by matching parallel phrases to new sentence pairs, and adding word links from the parallel phrase to words in the matching sentence segment. Experiments on an English--Swedish parallel corpus showed that the heuristic phrase-based method produced word alignments with high precision but low recall. In order to improve alignment recall, phrases were generalized by replacing words with part-of-speech categories. The generalization improved recall but at the expense of precision. Two filtering strategies were investigated to prune the large set of generalized phrases. Finally, the phrase-based method was compared to statistical word alignment with Giza++ and we found that although statistical alignments based on large datasets will outperform phrase-based word alignment, a combination of phrase-based and statistical word alignment outperformed pure statistical alignment in terms of Alignment Error Rate (AER)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="holmqvist-2010-heuristic">
<titleInfo>
<title>Heuristic Word Alignment with Parallel Phrases</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Holmqvist</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC‘10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a heuristic method for word alignment, which is the task of identifying corresponding words in parallel text. The heuristic method is based on parallel phrases extracted from manually word aligned sentence pairs. Word alignment is performed by matching parallel phrases to new sentence pairs, and adding word links from the parallel phrase to words in the matching sentence segment. Experiments on an English–Swedish parallel corpus showed that the heuristic phrase-based method produced word alignments with high precision but low recall. In order to improve alignment recall, phrases were generalized by replacing words with part-of-speech categories. The generalization improved recall but at the expense of precision. Two filtering strategies were investigated to prune the large set of generalized phrases. Finally, the phrase-based method was compared to statistical word alignment with Giza++ and we found that although statistical alignments based on large datasets will outperform phrase-based word alignment, a combination of phrase-based and statistical word alignment outperformed pure statistical alignment in terms of Alignment Error Rate (AER).</abstract>
<identifier type="citekey">holmqvist-2010-heuristic</identifier>
<location>
<url>https://aclanthology.org/L10-1353/</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Heuristic Word Alignment with Parallel Phrases
%A Holmqvist, Maria
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC‘10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F holmqvist-2010-heuristic
%X We present a heuristic method for word alignment, which is the task of identifying corresponding words in parallel text. The heuristic method is based on parallel phrases extracted from manually word aligned sentence pairs. Word alignment is performed by matching parallel phrases to new sentence pairs, and adding word links from the parallel phrase to words in the matching sentence segment. Experiments on an English–Swedish parallel corpus showed that the heuristic phrase-based method produced word alignments with high precision but low recall. In order to improve alignment recall, phrases were generalized by replacing words with part-of-speech categories. The generalization improved recall but at the expense of precision. Two filtering strategies were investigated to prune the large set of generalized phrases. Finally, the phrase-based method was compared to statistical word alignment with Giza++ and we found that although statistical alignments based on large datasets will outperform phrase-based word alignment, a combination of phrase-based and statistical word alignment outperformed pure statistical alignment in terms of Alignment Error Rate (AER).
%U https://aclanthology.org/L10-1353/
Markdown (Informal)
[Heuristic Word Alignment with Parallel Phrases](https://aclanthology.org/L10-1353/) (Holmqvist, LREC 2010)
ACL
- Maria Holmqvist. 2010. Heuristic Word Alignment with Parallel Phrases. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10), Valletta, Malta. European Language Resources Association (ELRA).