@inproceedings{murakami-etal-2008-statistical,
title = "Statistical machine translation without long parallel sentences for training data.",
author = "Murakami, Jin{'}ichi and
Tokuhisa, Masato and
Ikehara, Satoru",
booktitle = "Proceedings of the 5th International Workshop on Spoken Language Translation: Evaluation Campaign",
month = oct # " 20-21",
year = "2008",
address = "Waikiki, Hawaii",
url = "https://aclanthology.org/2008.iwslt-evaluation.19",
pages = "132--137",
abstract = "In this study, we paid attention to the reliability of phrase table. We have been used the phrase table using Och{'}s method[2]. And this method sometimes generate completely wrong phrase tables. We found that such phrase table caused by long parallel sentences. Therefore, we removed these long parallel sentences from training data. Also, we utilized general tools for statistical machine translation, such as {''}Giza++{''}[3], {''}moses{''}[4], and {''}training-phrase-model.perl{''}[5]. We obtained a BLEU score of 0.4047 (TEXT) and 0.3553(1-BEST) of the Challenge-EC task for our proposed method. On the other hand, we obtained a BLEU score of 0.3975(TEXT) and 0.3482(1-BEST) of the Challenge-EC task for a standard method. This means that our proposed method was effective for the Challenge-EC task. However, it was not effective for the BTECT-CE and Challenge-CE tasks. And our system was not good performance. For example, our system was the 7th place among 8 system for Challenge-EC task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="murakami-etal-2008-statistical">
<titleInfo>
<title>Statistical machine translation without long parallel sentences for training data.</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin’ichi</namePart>
<namePart type="family">Murakami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masato</namePart>
<namePart type="family">Tokuhisa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoru</namePart>
<namePart type="family">Ikehara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-oct 20-21</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th International Workshop on Spoken Language Translation: Evaluation Campaign</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Waikiki, Hawaii</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we paid attention to the reliability of phrase table. We have been used the phrase table using Och’s method[2]. And this method sometimes generate completely wrong phrase tables. We found that such phrase table caused by long parallel sentences. Therefore, we removed these long parallel sentences from training data. Also, we utilized general tools for statistical machine translation, such as ”Giza++”[3], ”moses”[4], and ”training-phrase-model.perl”[5]. We obtained a BLEU score of 0.4047 (TEXT) and 0.3553(1-BEST) of the Challenge-EC task for our proposed method. On the other hand, we obtained a BLEU score of 0.3975(TEXT) and 0.3482(1-BEST) of the Challenge-EC task for a standard method. This means that our proposed method was effective for the Challenge-EC task. However, it was not effective for the BTECT-CE and Challenge-CE tasks. And our system was not good performance. For example, our system was the 7th place among 8 system for Challenge-EC task.</abstract>
<identifier type="citekey">murakami-etal-2008-statistical</identifier>
<location>
<url>https://aclanthology.org/2008.iwslt-evaluation.19</url>
</location>
<part>
<date>2008-oct 20-21</date>
<extent unit="page">
<start>132</start>
<end>137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Statistical machine translation without long parallel sentences for training data.
%A Murakami, Jin’ichi
%A Tokuhisa, Masato
%A Ikehara, Satoru
%S Proceedings of the 5th International Workshop on Spoken Language Translation: Evaluation Campaign
%D 2008
%8 oct 20 21
%C Waikiki, Hawaii
%F murakami-etal-2008-statistical
%X In this study, we paid attention to the reliability of phrase table. We have been used the phrase table using Och’s method[2]. And this method sometimes generate completely wrong phrase tables. We found that such phrase table caused by long parallel sentences. Therefore, we removed these long parallel sentences from training data. Also, we utilized general tools for statistical machine translation, such as ”Giza++”[3], ”moses”[4], and ”training-phrase-model.perl”[5]. We obtained a BLEU score of 0.4047 (TEXT) and 0.3553(1-BEST) of the Challenge-EC task for our proposed method. On the other hand, we obtained a BLEU score of 0.3975(TEXT) and 0.3482(1-BEST) of the Challenge-EC task for a standard method. This means that our proposed method was effective for the Challenge-EC task. However, it was not effective for the BTECT-CE and Challenge-CE tasks. And our system was not good performance. For example, our system was the 7th place among 8 system for Challenge-EC task.
%U https://aclanthology.org/2008.iwslt-evaluation.19
%P 132-137
Markdown (Informal)
[Statistical machine translation without long parallel sentences for training data.](https://aclanthology.org/2008.iwslt-evaluation.19) (Murakami et al., IWSLT 2008)
ACL