@inproceedings{mohammed-niculae-2025-context,
title = "Context-Aware or Context-Insensitive? Assessing {LLM}s' Performance in Document-Level Translation",
author = "Mohammed, Wafaa and
Niculae, Vlad",
editor = "Bouillon, Pierrette and
Gerlach, Johanna and
Girletti, Sabrina and
Volkart, Lise and
Rubino, Raphael and
Sennrich, Rico and
Farinha, Ana C. and
Gaido, Marco and
Daems, Joke and
Kenny, Dorothy and
Moniz, Helena and
Szoc, Sara",
booktitle = "Proceedings of Machine Translation Summit XX: Volume 1",
month = jun,
year = "2025",
address = "Geneva, Switzerland",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2025.mtsummit-1.10/",
pages = "126--137",
ISBN = "978-2-9701897-0-1",
abstract = "Large language models (LLMs) are increasingly strong contenders in machine translation. In this work, we focus on document-level translation, where some words cannot be translated without context from outside the sentence. Specifically, we investigate the ability of prominent LLMs to utilize the document context during translation through a perturbation analysis (analyzing models' robustness to perturbed and randomized document context) and an attribution analysis (examining the contribution of relevant context to the translation). We conduct an extensive evaluation across nine LLMs from diverse model families and training paradigms, including translation-specialized LLMs, alongside two encoder-decoder transformer baselines. We find that LLMs' improved document-translation performance compared to encoder-decoder models is not reflected in pronoun translation performance. Our analysis highlight the need for context-aware finetuning of LLMs with a focus on relevant parts of the context to improve their reliability for document-level translation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mohammed-niculae-2025-context">
<titleInfo>
<title>Context-Aware or Context-Insensitive? Assessing LLMs’ Performance in Document-Level Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wafaa</namePart>
<namePart type="family">Mohammed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vlad</namePart>
<namePart type="family">Niculae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XX: Volume 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pierrette</namePart>
<namePart type="family">Bouillon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Gerlach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabrina</namePart>
<namePart type="family">Girletti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lise</namePart>
<namePart type="family">Volkart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raphael</namePart>
<namePart type="family">Rubino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rico</namePart>
<namePart type="family">Sennrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Farinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Gaido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joke</namePart>
<namePart type="family">Daems</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dorothy</namePart>
<namePart type="family">Kenny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Szoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Geneva, Switzerland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-2-9701897-0-1</identifier>
</relatedItem>
<abstract>Large language models (LLMs) are increasingly strong contenders in machine translation. In this work, we focus on document-level translation, where some words cannot be translated without context from outside the sentence. Specifically, we investigate the ability of prominent LLMs to utilize the document context during translation through a perturbation analysis (analyzing models’ robustness to perturbed and randomized document context) and an attribution analysis (examining the contribution of relevant context to the translation). We conduct an extensive evaluation across nine LLMs from diverse model families and training paradigms, including translation-specialized LLMs, alongside two encoder-decoder transformer baselines. We find that LLMs’ improved document-translation performance compared to encoder-decoder models is not reflected in pronoun translation performance. Our analysis highlight the need for context-aware finetuning of LLMs with a focus on relevant parts of the context to improve their reliability for document-level translation.</abstract>
<identifier type="citekey">mohammed-niculae-2025-context</identifier>
<location>
<url>https://aclanthology.org/2025.mtsummit-1.10/</url>
</location>
<part>
<date>2025-06</date>
<extent unit="page">
<start>126</start>
<end>137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Context-Aware or Context-Insensitive? Assessing LLMs’ Performance in Document-Level Translation
%A Mohammed, Wafaa
%A Niculae, Vlad
%Y Bouillon, Pierrette
%Y Gerlach, Johanna
%Y Girletti, Sabrina
%Y Volkart, Lise
%Y Rubino, Raphael
%Y Sennrich, Rico
%Y Farinha, Ana C.
%Y Gaido, Marco
%Y Daems, Joke
%Y Kenny, Dorothy
%Y Moniz, Helena
%Y Szoc, Sara
%S Proceedings of Machine Translation Summit XX: Volume 1
%D 2025
%8 June
%I European Association for Machine Translation
%C Geneva, Switzerland
%@ 978-2-9701897-0-1
%F mohammed-niculae-2025-context
%X Large language models (LLMs) are increasingly strong contenders in machine translation. In this work, we focus on document-level translation, where some words cannot be translated without context from outside the sentence. Specifically, we investigate the ability of prominent LLMs to utilize the document context during translation through a perturbation analysis (analyzing models’ robustness to perturbed and randomized document context) and an attribution analysis (examining the contribution of relevant context to the translation). We conduct an extensive evaluation across nine LLMs from diverse model families and training paradigms, including translation-specialized LLMs, alongside two encoder-decoder transformer baselines. We find that LLMs’ improved document-translation performance compared to encoder-decoder models is not reflected in pronoun translation performance. Our analysis highlight the need for context-aware finetuning of LLMs with a focus on relevant parts of the context to improve their reliability for document-level translation.
%U https://aclanthology.org/2025.mtsummit-1.10/
%P 126-137
Markdown (Informal)
[Context-Aware or Context-Insensitive? Assessing LLMs’ Performance in Document-Level Translation](https://aclanthology.org/2025.mtsummit-1.10/) (Mohammed & Niculae, MTSummit 2025)
ACL