@inproceedings{dankers-etal-2022-transformer,
title = "Can Transformer be Too Compositional? Analysing Idiom Processing in Neural Machine Translation",
author = "Dankers, Verna and
Lucas, Christopher and
Titov, Ivan",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.252",
doi = "10.18653/v1/2022.acl-long.252",
pages = "3608--3626",
abstract = "Unlike literal expressions, idioms{'} meanings do not directly follow from their parts, posing a challenge for neural machine translation (NMT). NMT models are often unable to translate idioms accurately and over-generate compositional, literal translations. In this work, we investigate whether the non-compositionality of idioms is reflected in the mechanics of the dominant NMT model, Transformer, by analysing the hidden states and attention patterns for models with English as source language and one of seven European languages as target language. When Transformer emits a non-literal translation - i.e. identifies the expression as idiomatic - the encoder processes idioms more strongly as single lexical units compared to literal expressions. This manifests in idioms{'} parts being grouped through attention and in reduced interaction between idioms and their context. In the decoder{'}s cross-attention, figurative inputs result in reduced attention on source-side tokens. These results suggest that Transformer{'}s tendency to process idioms as compositional expressions contributes to literal translations of idioms.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dankers-etal-2022-transformer">
<titleInfo>
<title>Can Transformer be Too Compositional? Analysing Idiom Processing in Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Lucas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Titov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Unlike literal expressions, idioms’ meanings do not directly follow from their parts, posing a challenge for neural machine translation (NMT). NMT models are often unable to translate idioms accurately and over-generate compositional, literal translations. In this work, we investigate whether the non-compositionality of idioms is reflected in the mechanics of the dominant NMT model, Transformer, by analysing the hidden states and attention patterns for models with English as source language and one of seven European languages as target language. When Transformer emits a non-literal translation - i.e. identifies the expression as idiomatic - the encoder processes idioms more strongly as single lexical units compared to literal expressions. This manifests in idioms’ parts being grouped through attention and in reduced interaction between idioms and their context. In the decoder’s cross-attention, figurative inputs result in reduced attention on source-side tokens. These results suggest that Transformer’s tendency to process idioms as compositional expressions contributes to literal translations of idioms.</abstract>
<identifier type="citekey">dankers-etal-2022-transformer</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.252</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.252</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>3608</start>
<end>3626</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Transformer be Too Compositional? Analysing Idiom Processing in Neural Machine Translation
%A Dankers, Verna
%A Lucas, Christopher
%A Titov, Ivan
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F dankers-etal-2022-transformer
%X Unlike literal expressions, idioms’ meanings do not directly follow from their parts, posing a challenge for neural machine translation (NMT). NMT models are often unable to translate idioms accurately and over-generate compositional, literal translations. In this work, we investigate whether the non-compositionality of idioms is reflected in the mechanics of the dominant NMT model, Transformer, by analysing the hidden states and attention patterns for models with English as source language and one of seven European languages as target language. When Transformer emits a non-literal translation - i.e. identifies the expression as idiomatic - the encoder processes idioms more strongly as single lexical units compared to literal expressions. This manifests in idioms’ parts being grouped through attention and in reduced interaction between idioms and their context. In the decoder’s cross-attention, figurative inputs result in reduced attention on source-side tokens. These results suggest that Transformer’s tendency to process idioms as compositional expressions contributes to literal translations of idioms.
%R 10.18653/v1/2022.acl-long.252
%U https://aclanthology.org/2022.acl-long.252
%U https://doi.org/10.18653/v1/2022.acl-long.252
%P 3608-3626
Markdown (Informal)
[Can Transformer be Too Compositional? Analysing Idiom Processing in Neural Machine Translation](https://aclanthology.org/2022.acl-long.252) (Dankers et al., ACL 2022)
ACL