@inproceedings{burda-lassen-2023-machine,
title = "Machine Translation of Folktales: small-data-driven and {LLM}-based approaches",
author = "Burda-Lassen, Olena",
editor = "Breitholtz, Ellen and
Lappin, Shalom and
Loaiciga, Sharid and
Ilinykh, Nikolai and
Dobnik, Simon",
booktitle = "Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD)",
month = sep,
year = "2023",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.clasp-1.8",
pages = "68--71",
abstract = "Can Large Language Models translate texts with rich cultural elements? How {``}cultured{''} are they? This paper provides an overview of an experiment in Machine Translation of Ukrainian folktales using Large Language Models (Open AI), Google Cloud Translation API, and Opus MT. After benchmarking their performance, we have fine-tuned an Opus MT model on a domain-specific small dataset specially created to translate folktales from Ukrainian to English. We have also tested various prompt engineering techniques on the new Open AI models to generate translations of our test dataset (folktale {`}The Mitten{'}) and have observed promising results. This research explores the importance of both small data and Large Language Models in Machine Learning, specifically in Machine Translation of literary texts, on the example of Ukrainian folktales.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="burda-lassen-2023-machine">
<titleInfo>
<title>Machine Translation of Folktales: small-data-driven and LLM-based approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olena</namePart>
<namePart type="family">Burda-Lassen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Breitholtz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shalom</namePart>
<namePart type="family">Lappin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharid</namePart>
<namePart type="family">Loaiciga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolai</namePart>
<namePart type="family">Ilinykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Can Large Language Models translate texts with rich cultural elements? How “cultured” are they? This paper provides an overview of an experiment in Machine Translation of Ukrainian folktales using Large Language Models (Open AI), Google Cloud Translation API, and Opus MT. After benchmarking their performance, we have fine-tuned an Opus MT model on a domain-specific small dataset specially created to translate folktales from Ukrainian to English. We have also tested various prompt engineering techniques on the new Open AI models to generate translations of our test dataset (folktale ‘The Mitten’) and have observed promising results. This research explores the importance of both small data and Large Language Models in Machine Learning, specifically in Machine Translation of literary texts, on the example of Ukrainian folktales.</abstract>
<identifier type="citekey">burda-lassen-2023-machine</identifier>
<location>
<url>https://aclanthology.org/2023.clasp-1.8</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>68</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine Translation of Folktales: small-data-driven and LLM-based approaches
%A Burda-Lassen, Olena
%Y Breitholtz, Ellen
%Y Lappin, Shalom
%Y Loaiciga, Sharid
%Y Ilinykh, Nikolai
%Y Dobnik, Simon
%S Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD)
%D 2023
%8 September
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F burda-lassen-2023-machine
%X Can Large Language Models translate texts with rich cultural elements? How “cultured” are they? This paper provides an overview of an experiment in Machine Translation of Ukrainian folktales using Large Language Models (Open AI), Google Cloud Translation API, and Opus MT. After benchmarking their performance, we have fine-tuned an Opus MT model on a domain-specific small dataset specially created to translate folktales from Ukrainian to English. We have also tested various prompt engineering techniques on the new Open AI models to generate translations of our test dataset (folktale ‘The Mitten’) and have observed promising results. This research explores the importance of both small data and Large Language Models in Machine Learning, specifically in Machine Translation of literary texts, on the example of Ukrainian folktales.
%U https://aclanthology.org/2023.clasp-1.8
%P 68-71
Markdown (Informal)
[Machine Translation of Folktales: small-data-driven and LLM-based approaches](https://aclanthology.org/2023.clasp-1.8) (Burda-Lassen, CLASP 2023)
ACL