@inproceedings{ahmadi-etal-2025-conloan,
title = "{C}on{L}oan: A Contrastive Multilingual Dataset for Evaluating Loanwords",
author = {Ahmadi, Sina and
Hess, Micha David and
{\'A}lvarez-Mellado, Elena and
Battisti, Alessia and
Ding, Cui and
G{\"o}hring, Anne and
Gao, Yingqiang and
Jiang, Zifan and
Michail, Andrianos and
Morad, Peshmerge and
Niklaus, Joel and
Panagiotopoulou, Maria Christina and
Perrella, Stefano and
Opitz, Juri and
Shaitarova, Anastassia and
Sennrich, Rico},
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1453/",
doi = "10.18653/v1/2025.acl-long.1453",
pages = "30070--30090",
ISBN = "979-8-89176-251-0",
abstract = "Lexical borrowing, the adoption of words from one language into another, is a ubiquitous linguistic phenomenon influenced by geopolitical, societal, and technological factors. This paper introduces ConLoan{--}a novel contrastive dataset comprising sentences with and without loanwords across 10 languages. Through systematic evaluation using this dataset, we investigate how state-of-the-art machine translation and language models process loanwords compared to their native alternatives. Our experiments reveal that these systems show systematic preferences for loanwords over native terms and exhibit varying performance across languages. These findings provide valuable insights for developing more linguistically robust NLP systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ahmadi-etal-2025-conloan">
<titleInfo>
<title>ConLoan: A Contrastive Multilingual Dataset for Evaluating Loanwords</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Ahmadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Micha</namePart>
<namePart type="given">David</namePart>
<namePart type="family">Hess</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Álvarez-Mellado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessia</namePart>
<namePart type="family">Battisti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cui</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Göhring</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingqiang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zifan</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrianos</namePart>
<namePart type="family">Michail</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peshmerge</namePart>
<namePart type="family">Morad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Niklaus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Christina</namePart>
<namePart type="family">Panagiotopoulou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefano</namePart>
<namePart type="family">Perrella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juri</namePart>
<namePart type="family">Opitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastassia</namePart>
<namePart type="family">Shaitarova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rico</namePart>
<namePart type="family">Sennrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Lexical borrowing, the adoption of words from one language into another, is a ubiquitous linguistic phenomenon influenced by geopolitical, societal, and technological factors. This paper introduces ConLoan–a novel contrastive dataset comprising sentences with and without loanwords across 10 languages. Through systematic evaluation using this dataset, we investigate how state-of-the-art machine translation and language models process loanwords compared to their native alternatives. Our experiments reveal that these systems show systematic preferences for loanwords over native terms and exhibit varying performance across languages. These findings provide valuable insights for developing more linguistically robust NLP systems.</abstract>
<identifier type="citekey">ahmadi-etal-2025-conloan</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1453</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1453/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>30070</start>
<end>30090</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ConLoan: A Contrastive Multilingual Dataset for Evaluating Loanwords
%A Ahmadi, Sina
%A Hess, Micha David
%A Álvarez-Mellado, Elena
%A Battisti, Alessia
%A Ding, Cui
%A Göhring, Anne
%A Gao, Yingqiang
%A Jiang, Zifan
%A Michail, Andrianos
%A Morad, Peshmerge
%A Niklaus, Joel
%A Panagiotopoulou, Maria Christina
%A Perrella, Stefano
%A Opitz, Juri
%A Shaitarova, Anastassia
%A Sennrich, Rico
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F ahmadi-etal-2025-conloan
%X Lexical borrowing, the adoption of words from one language into another, is a ubiquitous linguistic phenomenon influenced by geopolitical, societal, and technological factors. This paper introduces ConLoan–a novel contrastive dataset comprising sentences with and without loanwords across 10 languages. Through systematic evaluation using this dataset, we investigate how state-of-the-art machine translation and language models process loanwords compared to their native alternatives. Our experiments reveal that these systems show systematic preferences for loanwords over native terms and exhibit varying performance across languages. These findings provide valuable insights for developing more linguistically robust NLP systems.
%R 10.18653/v1/2025.acl-long.1453
%U https://aclanthology.org/2025.acl-long.1453/
%U https://doi.org/10.18653/v1/2025.acl-long.1453
%P 30070-30090
Markdown (Informal)
[ConLoan: A Contrastive Multilingual Dataset for Evaluating Loanwords](https://aclanthology.org/2025.acl-long.1453/) (Ahmadi et al., ACL 2025)
ACL
- Sina Ahmadi, Micha David Hess, Elena Álvarez-Mellado, Alessia Battisti, Cui Ding, Anne Göhring, Yingqiang Gao, Zifan Jiang, Andrianos Michail, Peshmerge Morad, Joel Niklaus, Maria Christina Panagiotopoulou, Stefano Perrella, Juri Opitz, Anastassia Shaitarova, and Rico Sennrich. 2025. ConLoan: A Contrastive Multilingual Dataset for Evaluating Loanwords. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 30070–30090, Vienna, Austria. Association for Computational Linguistics.