@inproceedings{liu-niehues-2024-transferable,
title = "How Transferable are Attribute Controllers on Pretrained Multilingual Translation Models?",
author = "Liu, Danni and
Niehues, Jan",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-long.20",
pages = "334--348",
abstract = "Customizing machine translation models to comply with desired attributes (e.g., formality or grammatical gender) is a well-studied topic. However, most current approaches rely on (semi-)supervised data with attribute annotations. This data scarcity bottlenecks democratizing such customization possibilities to a wider range of languages, particularly lower-resource ones. This gap is out of sync with recent progress in pretrained massively multilingual translation models. In response, we transfer the attribute controlling capabilities to languages without attribute-annotated data with an NLLB-200 model as a foundation. Inspired by techniques from controllable generation, we employ a gradient-based inference-time controller to steer the pretrained model. The controller transfers well to zero-shot conditions, as it is operates on pretrained multilingual representations and is attribute- rather than language-specific. With a comprehensive comparison to finetuning-based control, we demonstrate that, despite finetuning{'}s clear dominance in supervised settings, the gap to inference-time control closes when moving to zero-shot conditions, especially with new and distant target languages. The latter also shows stronger domain robustness. We further show that our inference-time control complements finetuning. Moreover, a human evaluation on a real low-resource language, Bengali, confirms our findings. Our code is in the supplementary material.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-niehues-2024-transferable">
<titleInfo>
<title>How Transferable are Attribute Controllers on Pretrained Multilingual Translation Models?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danni</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Customizing machine translation models to comply with desired attributes (e.g., formality or grammatical gender) is a well-studied topic. However, most current approaches rely on (semi-)supervised data with attribute annotations. This data scarcity bottlenecks democratizing such customization possibilities to a wider range of languages, particularly lower-resource ones. This gap is out of sync with recent progress in pretrained massively multilingual translation models. In response, we transfer the attribute controlling capabilities to languages without attribute-annotated data with an NLLB-200 model as a foundation. Inspired by techniques from controllable generation, we employ a gradient-based inference-time controller to steer the pretrained model. The controller transfers well to zero-shot conditions, as it is operates on pretrained multilingual representations and is attribute- rather than language-specific. With a comprehensive comparison to finetuning-based control, we demonstrate that, despite finetuning’s clear dominance in supervised settings, the gap to inference-time control closes when moving to zero-shot conditions, especially with new and distant target languages. The latter also shows stronger domain robustness. We further show that our inference-time control complements finetuning. Moreover, a human evaluation on a real low-resource language, Bengali, confirms our findings. Our code is in the supplementary material.</abstract>
<identifier type="citekey">liu-niehues-2024-transferable</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-long.20</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>334</start>
<end>348</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Transferable are Attribute Controllers on Pretrained Multilingual Translation Models?
%A Liu, Danni
%A Niehues, Jan
%Y Graham, Yvette
%Y Purver, Matthew
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F liu-niehues-2024-transferable
%X Customizing machine translation models to comply with desired attributes (e.g., formality or grammatical gender) is a well-studied topic. However, most current approaches rely on (semi-)supervised data with attribute annotations. This data scarcity bottlenecks democratizing such customization possibilities to a wider range of languages, particularly lower-resource ones. This gap is out of sync with recent progress in pretrained massively multilingual translation models. In response, we transfer the attribute controlling capabilities to languages without attribute-annotated data with an NLLB-200 model as a foundation. Inspired by techniques from controllable generation, we employ a gradient-based inference-time controller to steer the pretrained model. The controller transfers well to zero-shot conditions, as it is operates on pretrained multilingual representations and is attribute- rather than language-specific. With a comprehensive comparison to finetuning-based control, we demonstrate that, despite finetuning’s clear dominance in supervised settings, the gap to inference-time control closes when moving to zero-shot conditions, especially with new and distant target languages. The latter also shows stronger domain robustness. We further show that our inference-time control complements finetuning. Moreover, a human evaluation on a real low-resource language, Bengali, confirms our findings. Our code is in the supplementary material.
%U https://aclanthology.org/2024.eacl-long.20
%P 334-348
Markdown (Informal)
[How Transferable are Attribute Controllers on Pretrained Multilingual Translation Models?](https://aclanthology.org/2024.eacl-long.20) (Liu & Niehues, EACL 2024)
ACL