@inproceedings{root-hopkins-2023-williams,
title = "{W}illiams College`s Submission for the {C}oco4{MT} 2023 Shared Task",
author = "Root, Alex and
Hopkins, Mark",
booktitle = "Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://aclanthology.org/2023.mtsummit-coco4mt.4/",
pages = "28--32",
abstract = "Professional translation is expensive. As a consequence, when developing a translation system in the absence of a pre-existing parallel corpus, it is important to strategically choose sentences to have professionally translated for the training corpus. In our contribution to the Coco4MT 2023 Shared Task, we explore how sentence embeddings can be leveraged to choose an impactful set of sentences to translate. Based on six language pairs of the JHU Bible corpus, we demonstrate that a technique based on SimCSE embeddings outperforms a competitive suite of baselines."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="root-hopkins-2023-williams">
<titleInfo>
<title>Williams College‘s Submission for the Coco4MT 2023 Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Root</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Hopkins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Asia-Pacific Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Macau SAR, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Professional translation is expensive. As a consequence, when developing a translation system in the absence of a pre-existing parallel corpus, it is important to strategically choose sentences to have professionally translated for the training corpus. In our contribution to the Coco4MT 2023 Shared Task, we explore how sentence embeddings can be leveraged to choose an impactful set of sentences to translate. Based on six language pairs of the JHU Bible corpus, we demonstrate that a technique based on SimCSE embeddings outperforms a competitive suite of baselines.</abstract>
<identifier type="citekey">root-hopkins-2023-williams</identifier>
<location>
<url>https://aclanthology.org/2023.mtsummit-coco4mt.4/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>28</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Williams College‘s Submission for the Coco4MT 2023 Shared Task
%A Root, Alex
%A Hopkins, Mark
%S Proceedings of the Second Workshop on Corpus Generation and Corpus Augmentation for Machine Translation
%D 2023
%8 September
%I Asia-Pacific Association for Machine Translation
%C Macau SAR, China
%F root-hopkins-2023-williams
%X Professional translation is expensive. As a consequence, when developing a translation system in the absence of a pre-existing parallel corpus, it is important to strategically choose sentences to have professionally translated for the training corpus. In our contribution to the Coco4MT 2023 Shared Task, we explore how sentence embeddings can be leveraged to choose an impactful set of sentences to translate. Based on six language pairs of the JHU Bible corpus, we demonstrate that a technique based on SimCSE embeddings outperforms a competitive suite of baselines.
%U https://aclanthology.org/2023.mtsummit-coco4mt.4/
%P 28-32
Markdown (Informal)
[Williams College’s Submission for the Coco4MT 2023 Shared Task](https://aclanthology.org/2023.mtsummit-coco4mt.4/) (Root & Hopkins, MTSummit 2023)
ACL