@inproceedings{larasati-2012-improving,
title = "Improving Word Alignment by Exploiting Adapted Word Similarity",
author = "Larasati, Septina Dian",
editor = "Okita, Tsuyoshi and
Sokolov, Artem and
Watanabe, Taro",
booktitle = "Workshop on Monolingual Machine Translation",
month = oct # " 28-" # nov # " 1",
year = "2012",
address = "San Diego, California, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2012.amta-monomt.5",
abstract = "This paper presents a method to improve a word alignment model in a phrase-based Statistical Machine Translation system for a low-resourced language using a string similarity approach. Our method captures similar words that can be seen as semi-monolingual across languages, such as numbers, named entities, and adapted/loan words. We use several string similarity metrics to measure the monolinguality of the words, such as Longest Common Subsequence Ratio (LCSR), Minimum Edit Distance Ratio (MEDR), and we also use a modified BLEU Score (modBLEU). Our approach is to add intersecting alignment points for word pairs that are orthographically similar, before applying a word alignment heuristic, to generate a better word alignment. We demonstrate this approach on Indonesian-to-English translation task, where the languages share many similar words that are poorly aligned given a limited training data. This approach gives a statistically significant improvement by up to 0.66 in terms of BLEU score.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="larasati-2012-improving">
<titleInfo>
<title>Improving Word Alignment by Exploiting Adapted Word Similarity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Septina</namePart>
<namePart type="given">Dian</namePart>
<namePart type="family">Larasati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-oct 28-nov 1</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Workshop on Monolingual Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tsuyoshi</namePart>
<namePart type="family">Okita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Sokolov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a method to improve a word alignment model in a phrase-based Statistical Machine Translation system for a low-resourced language using a string similarity approach. Our method captures similar words that can be seen as semi-monolingual across languages, such as numbers, named entities, and adapted/loan words. We use several string similarity metrics to measure the monolinguality of the words, such as Longest Common Subsequence Ratio (LCSR), Minimum Edit Distance Ratio (MEDR), and we also use a modified BLEU Score (modBLEU). Our approach is to add intersecting alignment points for word pairs that are orthographically similar, before applying a word alignment heuristic, to generate a better word alignment. We demonstrate this approach on Indonesian-to-English translation task, where the languages share many similar words that are poorly aligned given a limited training data. This approach gives a statistically significant improvement by up to 0.66 in terms of BLEU score.</abstract>
<identifier type="citekey">larasati-2012-improving</identifier>
<location>
<url>https://aclanthology.org/2012.amta-monomt.5</url>
</location>
<part>
<date>2012-oct 28-nov 1</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Word Alignment by Exploiting Adapted Word Similarity
%A Larasati, Septina Dian
%Y Okita, Tsuyoshi
%Y Sokolov, Artem
%Y Watanabe, Taro
%S Workshop on Monolingual Machine Translation
%D 2012
%8 oct 28 nov 1
%I Association for Machine Translation in the Americas
%C San Diego, California, USA
%F larasati-2012-improving
%X This paper presents a method to improve a word alignment model in a phrase-based Statistical Machine Translation system for a low-resourced language using a string similarity approach. Our method captures similar words that can be seen as semi-monolingual across languages, such as numbers, named entities, and adapted/loan words. We use several string similarity metrics to measure the monolinguality of the words, such as Longest Common Subsequence Ratio (LCSR), Minimum Edit Distance Ratio (MEDR), and we also use a modified BLEU Score (modBLEU). Our approach is to add intersecting alignment points for word pairs that are orthographically similar, before applying a word alignment heuristic, to generate a better word alignment. We demonstrate this approach on Indonesian-to-English translation task, where the languages share many similar words that are poorly aligned given a limited training data. This approach gives a statistically significant improvement by up to 0.66 in terms of BLEU score.
%U https://aclanthology.org/2012.amta-monomt.5
Markdown (Informal)
[Improving Word Alignment by Exploiting Adapted Word Similarity](https://aclanthology.org/2012.amta-monomt.5) (Larasati, AMTA 2012)
ACL