@inproceedings{tamura-etal-2023-target,
title = "Target Language Monolingual Translation Memory based {NMT} by Cross-lingual Retrieval of Similar Translations and Reranking",
author = "Tamura, Takuya and
Wang, Xiaotian and
Utsuro, Takehito and
Nagata, Masaaki",
editor = "Utiyama, Masao and
Wang, Rui",
booktitle = "Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://aclanthology.org/2023.mtsummit-research.26",
pages = "313--323",
abstract = "Retrieve-edit-rerank is a text generation framework composed of three steps: retrieving for sentences using the input sentence as a query, generating multiple output sentence candidates, and selecting the final output sentence from these candidates. This simple approach has outperformed other existing and more complex methods. This paper focuses on the retrieving and the reranking steps. In the retrieving step, we propose retrieving similar target language sentences from a target language monolingual translation memory using language-independent sentence embeddings generated by mSBERT or LaBSE. We demonstrate that this approach significantly outperforms existing methods that use monolingual inter-sentence similarity measures such as edit distance, which is only applicable to a parallel translation memory. In the reranking step, we propose a new reranking score for selecting the best sentences, which considers both the log-likelihood of each candidate and the sentence embeddings based similarity between the input and the candidate. We evaluated the proposed method for English-to-Japanese translation on the ASPEC and English-to-French translation on the EU Bookshop Corpus (EUBC). The proposed method significantly exceeded the baseline in BLEU score, especially observing a 1.4-point improvement in the EUBC dataset over the original Retrieve-Edit-Rerank method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tamura-etal-2023-target">
<titleInfo>
<title>Target Language Monolingual Translation Memory based NMT by Cross-lingual Retrieval of Similar Translations and Reranking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Takuya</namePart>
<namePart type="family">Tamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaotian</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takehito</namePart>
<namePart type="family">Utsuro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masaaki</namePart>
<namePart type="family">Nagata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masao</namePart>
<namePart type="family">Utiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Asia-Pacific Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Macau SAR, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Retrieve-edit-rerank is a text generation framework composed of three steps: retrieving for sentences using the input sentence as a query, generating multiple output sentence candidates, and selecting the final output sentence from these candidates. This simple approach has outperformed other existing and more complex methods. This paper focuses on the retrieving and the reranking steps. In the retrieving step, we propose retrieving similar target language sentences from a target language monolingual translation memory using language-independent sentence embeddings generated by mSBERT or LaBSE. We demonstrate that this approach significantly outperforms existing methods that use monolingual inter-sentence similarity measures such as edit distance, which is only applicable to a parallel translation memory. In the reranking step, we propose a new reranking score for selecting the best sentences, which considers both the log-likelihood of each candidate and the sentence embeddings based similarity between the input and the candidate. We evaluated the proposed method for English-to-Japanese translation on the ASPEC and English-to-French translation on the EU Bookshop Corpus (EUBC). The proposed method significantly exceeded the baseline in BLEU score, especially observing a 1.4-point improvement in the EUBC dataset over the original Retrieve-Edit-Rerank method.</abstract>
<identifier type="citekey">tamura-etal-2023-target</identifier>
<location>
<url>https://aclanthology.org/2023.mtsummit-research.26</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>313</start>
<end>323</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Target Language Monolingual Translation Memory based NMT by Cross-lingual Retrieval of Similar Translations and Reranking
%A Tamura, Takuya
%A Wang, Xiaotian
%A Utsuro, Takehito
%A Nagata, Masaaki
%Y Utiyama, Masao
%Y Wang, Rui
%S Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track
%D 2023
%8 September
%I Asia-Pacific Association for Machine Translation
%C Macau SAR, China
%F tamura-etal-2023-target
%X Retrieve-edit-rerank is a text generation framework composed of three steps: retrieving for sentences using the input sentence as a query, generating multiple output sentence candidates, and selecting the final output sentence from these candidates. This simple approach has outperformed other existing and more complex methods. This paper focuses on the retrieving and the reranking steps. In the retrieving step, we propose retrieving similar target language sentences from a target language monolingual translation memory using language-independent sentence embeddings generated by mSBERT or LaBSE. We demonstrate that this approach significantly outperforms existing methods that use monolingual inter-sentence similarity measures such as edit distance, which is only applicable to a parallel translation memory. In the reranking step, we propose a new reranking score for selecting the best sentences, which considers both the log-likelihood of each candidate and the sentence embeddings based similarity between the input and the candidate. We evaluated the proposed method for English-to-Japanese translation on the ASPEC and English-to-French translation on the EU Bookshop Corpus (EUBC). The proposed method significantly exceeded the baseline in BLEU score, especially observing a 1.4-point improvement in the EUBC dataset over the original Retrieve-Edit-Rerank method.
%U https://aclanthology.org/2023.mtsummit-research.26
%P 313-323
Markdown (Informal)
[Target Language Monolingual Translation Memory based NMT by Cross-lingual Retrieval of Similar Translations and Reranking](https://aclanthology.org/2023.mtsummit-research.26) (Tamura et al., MTSummit 2023)
ACL