@inproceedings{oshika-etal-2024-simplifying,
title = "Simplifying Translations for Children: Iterative Simplification Considering Age of Acquisition with {LLM}s",
author = "Oshika, Masashi and
Morishita, Makoto and
Hirao, Tsutomu and
Sasano, Ryohei and
Takeda, Koichi",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.506/",
doi = "10.18653/v1/2024.findings-acl.506",
pages = "8567--8577",
abstract = "In recent years, neural machine translation (NMT) has become widely used in everyday life. However, the current NMT lacks a mechanism to adjust the difficulty level of translations to match the user{'}s language level. Additionally, due to the bias in the training data for NMT, translations of simple source sentences are often produced with complex words. In particular, this could pose a problem for children, who may not be able to understand the meaning of the translations correctly. In this study, we propose a method that replaces high Age of Acquisitions (AoA) words in translations with simpler words to match the translations to the user{'}s level. We achieve this by using large language models (LLMs), providing a triple of a source sentence, a translation, and a target word to be replaced. We create a benchmark dataset using back-translation on Simple English Wikipedia. The experimental results obtained from the dataset show that our method effectively replaces high-AoA words with lower-AoA words and, moreover, can iteratively replace most of the high-AoA words while still maintaining high BLEU and COMET scores."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oshika-etal-2024-simplifying">
<titleInfo>
<title>Simplifying Translations for Children: Iterative Simplification Considering Age of Acquisition with LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masashi</namePart>
<namePart type="family">Oshika</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Morishita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsutomu</namePart>
<namePart type="family">Hirao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryohei</namePart>
<namePart type="family">Sasano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koichi</namePart>
<namePart type="family">Takeda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, neural machine translation (NMT) has become widely used in everyday life. However, the current NMT lacks a mechanism to adjust the difficulty level of translations to match the user’s language level. Additionally, due to the bias in the training data for NMT, translations of simple source sentences are often produced with complex words. In particular, this could pose a problem for children, who may not be able to understand the meaning of the translations correctly. In this study, we propose a method that replaces high Age of Acquisitions (AoA) words in translations with simpler words to match the translations to the user’s level. We achieve this by using large language models (LLMs), providing a triple of a source sentence, a translation, and a target word to be replaced. We create a benchmark dataset using back-translation on Simple English Wikipedia. The experimental results obtained from the dataset show that our method effectively replaces high-AoA words with lower-AoA words and, moreover, can iteratively replace most of the high-AoA words while still maintaining high BLEU and COMET scores.</abstract>
<identifier type="citekey">oshika-etal-2024-simplifying</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.506</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.506/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>8567</start>
<end>8577</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Simplifying Translations for Children: Iterative Simplification Considering Age of Acquisition with LLMs
%A Oshika, Masashi
%A Morishita, Makoto
%A Hirao, Tsutomu
%A Sasano, Ryohei
%A Takeda, Koichi
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F oshika-etal-2024-simplifying
%X In recent years, neural machine translation (NMT) has become widely used in everyday life. However, the current NMT lacks a mechanism to adjust the difficulty level of translations to match the user’s language level. Additionally, due to the bias in the training data for NMT, translations of simple source sentences are often produced with complex words. In particular, this could pose a problem for children, who may not be able to understand the meaning of the translations correctly. In this study, we propose a method that replaces high Age of Acquisitions (AoA) words in translations with simpler words to match the translations to the user’s level. We achieve this by using large language models (LLMs), providing a triple of a source sentence, a translation, and a target word to be replaced. We create a benchmark dataset using back-translation on Simple English Wikipedia. The experimental results obtained from the dataset show that our method effectively replaces high-AoA words with lower-AoA words and, moreover, can iteratively replace most of the high-AoA words while still maintaining high BLEU and COMET scores.
%R 10.18653/v1/2024.findings-acl.506
%U https://aclanthology.org/2024.findings-acl.506/
%U https://doi.org/10.18653/v1/2024.findings-acl.506
%P 8567-8577
Markdown (Informal)
[Simplifying Translations for Children: Iterative Simplification Considering Age of Acquisition with LLMs](https://aclanthology.org/2024.findings-acl.506/) (Oshika et al., Findings 2024)
ACL