@inproceedings{nonomura-etal-2026-mitigating,
title = "Mitigating Language Bias in Multilingual Sentence Embeddings for Cross-Lingual Similarity Estimation",
author = "Nonomura, Kanade and
Fukushima, Keita and
Kondo, Risa and
Kajiwara, Tomoyuki",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.26/",
pages = "385--394",
ISBN = "979-8-89176-413-2",
abstract = "We disentangle multilingual sentence embeddings into language-dependent and language-agnostic components, leveraging the latter to improve cross-lingual similarity estimation. Previous studies on this approach have trained disentanglers by combining intra-component constraints, which either align or disalign language-dependent embeddings or language-agnostic embeddings, with inter-component constraints across both embeddings. However, when and how these constraints are effective remains unclear. Our experiments on sentence similarity estimation and machine translation quality estimation revealed that while intra-component constraints and the combination of both constraints are effective for encoder-based multilingual sentence embeddings, inter-component constraints are effective for decoder-based ones. Furthermore, our detailed analysis revealed distinct roles: intra-component constraints improve uniformity within the embedding space, while inter-component constraints enhance cross-lingual alignment between parallel sentences."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nonomura-etal-2026-mitigating">
<titleInfo>
<title>Mitigating Language Bias in Multilingual Sentence Embeddings for Cross-Lingual Similarity Estimation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kanade</namePart>
<namePart type="family">Nonomura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keita</namePart>
<namePart type="family">Fukushima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Risa</namePart>
<namePart type="family">Kondo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoyuki</namePart>
<namePart type="family">Kajiwara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>We disentangle multilingual sentence embeddings into language-dependent and language-agnostic components, leveraging the latter to improve cross-lingual similarity estimation. Previous studies on this approach have trained disentanglers by combining intra-component constraints, which either align or disalign language-dependent embeddings or language-agnostic embeddings, with inter-component constraints across both embeddings. However, when and how these constraints are effective remains unclear. Our experiments on sentence similarity estimation and machine translation quality estimation revealed that while intra-component constraints and the combination of both constraints are effective for encoder-based multilingual sentence embeddings, inter-component constraints are effective for decoder-based ones. Furthermore, our detailed analysis revealed distinct roles: intra-component constraints improve uniformity within the embedding space, while inter-component constraints enhance cross-lingual alignment between parallel sentences.</abstract>
<identifier type="citekey">nonomura-etal-2026-mitigating</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.26/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>385</start>
<end>394</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mitigating Language Bias in Multilingual Sentence Embeddings for Cross-Lingual Similarity Estimation
%A Nonomura, Kanade
%A Fukushima, Keita
%A Kondo, Risa
%A Kajiwara, Tomoyuki
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F nonomura-etal-2026-mitigating
%X We disentangle multilingual sentence embeddings into language-dependent and language-agnostic components, leveraging the latter to improve cross-lingual similarity estimation. Previous studies on this approach have trained disentanglers by combining intra-component constraints, which either align or disalign language-dependent embeddings or language-agnostic embeddings, with inter-component constraints across both embeddings. However, when and how these constraints are effective remains unclear. Our experiments on sentence similarity estimation and machine translation quality estimation revealed that while intra-component constraints and the combination of both constraints are effective for encoder-based multilingual sentence embeddings, inter-component constraints are effective for decoder-based ones. Furthermore, our detailed analysis revealed distinct roles: intra-component constraints improve uniformity within the embedding space, while inter-component constraints enhance cross-lingual alignment between parallel sentences.
%U https://aclanthology.org/2026.starsem-conference.26/
%P 385-394
Markdown (Informal)
[Mitigating Language Bias in Multilingual Sentence Embeddings for Cross-Lingual Similarity Estimation](https://aclanthology.org/2026.starsem-conference.26/) (Nonomura et al., *SEM 2026)
ACL