@article{mendonca-etal-2023-onception,
title = "Onception: Active Learning with Expert Advice for Real World Machine Translation",
author = "Mendon{\c{c}}a, V{\^a}nia and
Rei, Ricardo and
Coheur, Lu{\'\i}sa and
Sardinha, Alberto",
journal = "Computational Linguistics",
volume = "49",
number = "2",
month = jun,
year = "2023",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2023.cl-2.3",
doi = "10.1162/coli_a_00473",
pages = "325--372",
abstract = "Active learning can play an important role in low-resource settings (i.e., where annotated data is scarce), by selecting which instances may be more worthy to annotate. Most active learning approaches for Machine Translation assume the existence of a pool of sentences in a source language, and rely on human annotators to provide translations or post-edits, which can still be costly. In this article, we apply active learning to a real-world human-in-the-loop scenario in which we assume that: (1) the source sentences may not be readily available, but instead arrive in a stream; (2) the automatic translations receive feedback in the form of a rating, instead of a correct/edited translation, since the human-in-the-loop might be a user looking for a translation, but not be able to provide one. To tackle the challenge of deciding whether each incoming pair source{--}translations is worthy to query for human feedback, we resort to a number of stream-based active learning query strategies. Moreover, because we do not know in advance which query strategy will be the most adequate for a certain language pair and set of Machine Translation models, we propose to dynamically combine multiple strategies using prediction with expert advice. Our experiments on different language pairs and feedback settings show that using active learning allows us to converge on the best Machine Translation systems with fewer human interactions. Furthermore, combining multiple strategies using prediction with expert advice outperforms several individual active learning strategies with even fewer interactions, particularly in partial feedback settings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mendonca-etal-2023-onception">
<titleInfo>
<title>Onception: Active Learning with Expert Advice for Real World Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vânia</namePart>
<namePart type="family">Mendonça</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ricardo</namePart>
<namePart type="family">Rei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luísa</namePart>
<namePart type="family">Coheur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Sardinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Active learning can play an important role in low-resource settings (i.e., where annotated data is scarce), by selecting which instances may be more worthy to annotate. Most active learning approaches for Machine Translation assume the existence of a pool of sentences in a source language, and rely on human annotators to provide translations or post-edits, which can still be costly. In this article, we apply active learning to a real-world human-in-the-loop scenario in which we assume that: (1) the source sentences may not be readily available, but instead arrive in a stream; (2) the automatic translations receive feedback in the form of a rating, instead of a correct/edited translation, since the human-in-the-loop might be a user looking for a translation, but not be able to provide one. To tackle the challenge of deciding whether each incoming pair source–translations is worthy to query for human feedback, we resort to a number of stream-based active learning query strategies. Moreover, because we do not know in advance which query strategy will be the most adequate for a certain language pair and set of Machine Translation models, we propose to dynamically combine multiple strategies using prediction with expert advice. Our experiments on different language pairs and feedback settings show that using active learning allows us to converge on the best Machine Translation systems with fewer human interactions. Furthermore, combining multiple strategies using prediction with expert advice outperforms several individual active learning strategies with even fewer interactions, particularly in partial feedback settings.</abstract>
<identifier type="citekey">mendonca-etal-2023-onception</identifier>
<identifier type="doi">10.1162/coli_a_00473</identifier>
<location>
<url>https://aclanthology.org/2023.cl-2.3</url>
</location>
<part>
<date>2023-06</date>
<detail type="volume"><number>49</number></detail>
<detail type="issue"><number>2</number></detail>
<extent unit="page">
<start>325</start>
<end>372</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Onception: Active Learning with Expert Advice for Real World Machine Translation
%A Mendonça, Vânia
%A Rei, Ricardo
%A Coheur, Luísa
%A Sardinha, Alberto
%J Computational Linguistics
%D 2023
%8 June
%V 49
%N 2
%I MIT Press
%C Cambridge, MA
%F mendonca-etal-2023-onception
%X Active learning can play an important role in low-resource settings (i.e., where annotated data is scarce), by selecting which instances may be more worthy to annotate. Most active learning approaches for Machine Translation assume the existence of a pool of sentences in a source language, and rely on human annotators to provide translations or post-edits, which can still be costly. In this article, we apply active learning to a real-world human-in-the-loop scenario in which we assume that: (1) the source sentences may not be readily available, but instead arrive in a stream; (2) the automatic translations receive feedback in the form of a rating, instead of a correct/edited translation, since the human-in-the-loop might be a user looking for a translation, but not be able to provide one. To tackle the challenge of deciding whether each incoming pair source–translations is worthy to query for human feedback, we resort to a number of stream-based active learning query strategies. Moreover, because we do not know in advance which query strategy will be the most adequate for a certain language pair and set of Machine Translation models, we propose to dynamically combine multiple strategies using prediction with expert advice. Our experiments on different language pairs and feedback settings show that using active learning allows us to converge on the best Machine Translation systems with fewer human interactions. Furthermore, combining multiple strategies using prediction with expert advice outperforms several individual active learning strategies with even fewer interactions, particularly in partial feedback settings.
%R 10.1162/coli_a_00473
%U https://aclanthology.org/2023.cl-2.3
%U https://doi.org/10.1162/coli_a_00473
%P 325-372
Markdown (Informal)
[Onception: Active Learning with Expert Advice for Real World Machine Translation](https://aclanthology.org/2023.cl-2.3) (Mendonça et al., CL 2023)
ACL