@inproceedings{liu-etal-2024-cloudsheep,
title = "{C}loud{S}heep System for {WMT}24 Discourse-Level Literary Translation",
author = "Liu, Lisa and
Liu, Ryan and
Tsai, Angela and
Shang, Jingbo",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.95",
pages = "960--966",
abstract = "This paper describes the CloudSheep translation system for WMT24 Discourse-Level Literary Translation shared task. We participated in the Chinese-English direction on the unconstrained track. Our approach to the task used a pipeline of different tools in order to maximize the translation accuracy and flow of the text by combining the strengths of each tool. In particular, our focus was to translate names consistently and idioms correctly. To achieve consistent names throughout a text, a custom name dictionary was generated for each text, containing person and place names, along with their translations. A common honorific dictionary was applied for consistency with titles, especially in historical or cultivation novels. The names were found and translated with GPT 3.5-turbo. To achieve accurate and concise translations of idioms, which are often translated literally and verbosely, we integrated the CC-CEDICT library to provide official definitions. Then, we used GPT-4 to pick the best dictionary definition that fit the context and rephrase it to fit grammatically within a sentence. For the translation of non-name and non-idiom terms, we used Google Translate. We compared our approach{'}s performance with Google Translate as a baseline using BLEU, chrF, and COMET, as well as A/B testing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2024-cloudsheep">
<titleInfo>
<title>CloudSheep System for WMT24 Discourse-Level Literary Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Tsai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingbo</namePart>
<namePart type="family">Shang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the CloudSheep translation system for WMT24 Discourse-Level Literary Translation shared task. We participated in the Chinese-English direction on the unconstrained track. Our approach to the task used a pipeline of different tools in order to maximize the translation accuracy and flow of the text by combining the strengths of each tool. In particular, our focus was to translate names consistently and idioms correctly. To achieve consistent names throughout a text, a custom name dictionary was generated for each text, containing person and place names, along with their translations. A common honorific dictionary was applied for consistency with titles, especially in historical or cultivation novels. The names were found and translated with GPT 3.5-turbo. To achieve accurate and concise translations of idioms, which are often translated literally and verbosely, we integrated the CC-CEDICT library to provide official definitions. Then, we used GPT-4 to pick the best dictionary definition that fit the context and rephrase it to fit grammatically within a sentence. For the translation of non-name and non-idiom terms, we used Google Translate. We compared our approach’s performance with Google Translate as a baseline using BLEU, chrF, and COMET, as well as A/B testing.</abstract>
<identifier type="citekey">liu-etal-2024-cloudsheep</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.95</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>960</start>
<end>966</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CloudSheep System for WMT24 Discourse-Level Literary Translation
%A Liu, Lisa
%A Liu, Ryan
%A Tsai, Angela
%A Shang, Jingbo
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F liu-etal-2024-cloudsheep
%X This paper describes the CloudSheep translation system for WMT24 Discourse-Level Literary Translation shared task. We participated in the Chinese-English direction on the unconstrained track. Our approach to the task used a pipeline of different tools in order to maximize the translation accuracy and flow of the text by combining the strengths of each tool. In particular, our focus was to translate names consistently and idioms correctly. To achieve consistent names throughout a text, a custom name dictionary was generated for each text, containing person and place names, along with their translations. A common honorific dictionary was applied for consistency with titles, especially in historical or cultivation novels. The names were found and translated with GPT 3.5-turbo. To achieve accurate and concise translations of idioms, which are often translated literally and verbosely, we integrated the CC-CEDICT library to provide official definitions. Then, we used GPT-4 to pick the best dictionary definition that fit the context and rephrase it to fit grammatically within a sentence. For the translation of non-name and non-idiom terms, we used Google Translate. We compared our approach’s performance with Google Translate as a baseline using BLEU, chrF, and COMET, as well as A/B testing.
%U https://aclanthology.org/2024.wmt-1.95
%P 960-966
Markdown (Informal)
[CloudSheep System for WMT24 Discourse-Level Literary Translation](https://aclanthology.org/2024.wmt-1.95) (Liu et al., WMT 2024)
ACL