@inproceedings{poelman-lhoneux-2025-roles,
title = "The Roles of {English} in Evaluating Multilingual Language Models",
author = "Poelman, Wessel and
de Lhoneux, Miryam",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.53/",
pages = "492--498",
ISBN = "978-9908-53-109-0",
abstract = "Multilingual natural language processing is getting increased attention, with numerous models, benchmarks, and methods being released for many languages. English is often used in multilingual evaluation to prompt language models (LMs), mainly to overcome the lack of instruction tuning data in other languages. In this position paper, we lay out two roles of English in multilingual LM evaluations: as an interface and as a natural language. We argue that these roles have different goals: task performance versus language understanding. This discrepancy is highlighted with examples from datasets and evaluation setups. Numerous works explicitly use English as an interface to boost task performance. We recommend to move away from these imprecise methods and instead focus on language understanding."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="poelman-lhoneux-2025-roles">
<titleInfo>
<title>The Roles of English in Evaluating Multilingual Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wessel</namePart>
<namePart type="family">Poelman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miryam</namePart>
<namePart type="family">de Lhoneux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>Multilingual natural language processing is getting increased attention, with numerous models, benchmarks, and methods being released for many languages. English is often used in multilingual evaluation to prompt language models (LMs), mainly to overcome the lack of instruction tuning data in other languages. In this position paper, we lay out two roles of English in multilingual LM evaluations: as an interface and as a natural language. We argue that these roles have different goals: task performance versus language understanding. This discrepancy is highlighted with examples from datasets and evaluation setups. Numerous works explicitly use English as an interface to boost task performance. We recommend to move away from these imprecise methods and instead focus on language understanding.</abstract>
<identifier type="citekey">poelman-lhoneux-2025-roles</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.53/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>492</start>
<end>498</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Roles of English in Evaluating Multilingual Language Models
%A Poelman, Wessel
%A de Lhoneux, Miryam
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F poelman-lhoneux-2025-roles
%X Multilingual natural language processing is getting increased attention, with numerous models, benchmarks, and methods being released for many languages. English is often used in multilingual evaluation to prompt language models (LMs), mainly to overcome the lack of instruction tuning data in other languages. In this position paper, we lay out two roles of English in multilingual LM evaluations: as an interface and as a natural language. We argue that these roles have different goals: task performance versus language understanding. This discrepancy is highlighted with examples from datasets and evaluation setups. Numerous works explicitly use English as an interface to boost task performance. We recommend to move away from these imprecise methods and instead focus on language understanding.
%U https://aclanthology.org/2025.nodalida-1.53/
%P 492-498
Markdown (Informal)
[The Roles of English in Evaluating Multilingual Language Models](https://aclanthology.org/2025.nodalida-1.53/) (Poelman & de Lhoneux, NoDaLiDa 2025)
ACL
- Wessel Poelman and Miryam de Lhoneux. 2025. The Roles of English in Evaluating Multilingual Language Models. In Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025), pages 492–498, Tallinn, Estonia. University of Tartu Library.