@inproceedings{ng-etal-2026-modality,
title = "Modality Matching Matters: Calibrating Language Distances for Cross-Lingual Transfer in {URIEL}+",
author = {Ng, York Hay and
Khan, Aditya and
Lu, Xiang and
Salloum, Matteo and
Zhou, Michael and
Hoang, Phuong Hanh and
Do{\u{g}}ru{\"o}z, A. Seza and
Lee, En-Shiun Annie},
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-srw.8/",
pages = "110--130",
ISBN = "979-8-89176-383-8",
abstract = "Existing linguistic knowledge bases such as URIEL+ provide valuable geographic, genetic and typological distances for cross-lingual transfer but suffer from two key limitations. First, their one-size-fits-all vector representations are ill-suited to the diverse structures of linguistic data. Second, they lack a principled method for aggregating these signals into a single, comprehensive score. In this paper, we address these gaps by introducing a framework for type-matched language distances. We propose novel, structure-aware representations for each distance type: speaker-weighted distributions for geography, hyperbolic embeddings for genealogy, and a latent variables model for typology. We unify these signals into a robust, task-agnostic composite distance. Across multiple zero-shot transfer benchmarks, we demonstrate that our representations significantly improve transfer performance when the distance type is relevant to the task, while our composite distance yields gains in most tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ng-etal-2026-modality">
<titleInfo>
<title>Modality Matching Matters: Calibrating Language Distances for Cross-Lingual Transfer in URIEL+</title>
</titleInfo>
<name type="personal">
<namePart type="given">York</namePart>
<namePart type="given">Hay</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Salloum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phuong</namePart>
<namePart type="given">Hanh</namePart>
<namePart type="family">Hoang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">En-Shiun</namePart>
<namePart type="given">Annie</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Selene</namePart>
<namePart type="family">Baez Santamaria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Ashish</namePart>
<namePart type="family">Somayajula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atsuki</namePart>
<namePart type="family">Yamaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-383-8</identifier>
</relatedItem>
<abstract>Existing linguistic knowledge bases such as URIEL+ provide valuable geographic, genetic and typological distances for cross-lingual transfer but suffer from two key limitations. First, their one-size-fits-all vector representations are ill-suited to the diverse structures of linguistic data. Second, they lack a principled method for aggregating these signals into a single, comprehensive score. In this paper, we address these gaps by introducing a framework for type-matched language distances. We propose novel, structure-aware representations for each distance type: speaker-weighted distributions for geography, hyperbolic embeddings for genealogy, and a latent variables model for typology. We unify these signals into a robust, task-agnostic composite distance. Across multiple zero-shot transfer benchmarks, we demonstrate that our representations significantly improve transfer performance when the distance type is relevant to the task, while our composite distance yields gains in most tasks.</abstract>
<identifier type="citekey">ng-etal-2026-modality</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-srw.8/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>110</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modality Matching Matters: Calibrating Language Distances for Cross-Lingual Transfer in URIEL+
%A Ng, York Hay
%A Khan, Aditya
%A Lu, Xiang
%A Salloum, Matteo
%A Zhou, Michael
%A Hoang, Phuong Hanh
%A Doğruöz, A. Seza
%A Lee, En-Shiun Annie
%Y Baez Santamaria, Selene
%Y Somayajula, Sai Ashish
%Y Yamaguchi, Atsuki
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-383-8
%F ng-etal-2026-modality
%X Existing linguistic knowledge bases such as URIEL+ provide valuable geographic, genetic and typological distances for cross-lingual transfer but suffer from two key limitations. First, their one-size-fits-all vector representations are ill-suited to the diverse structures of linguistic data. Second, they lack a principled method for aggregating these signals into a single, comprehensive score. In this paper, we address these gaps by introducing a framework for type-matched language distances. We propose novel, structure-aware representations for each distance type: speaker-weighted distributions for geography, hyperbolic embeddings for genealogy, and a latent variables model for typology. We unify these signals into a robust, task-agnostic composite distance. Across multiple zero-shot transfer benchmarks, we demonstrate that our representations significantly improve transfer performance when the distance type is relevant to the task, while our composite distance yields gains in most tasks.
%U https://aclanthology.org/2026.eacl-srw.8/
%P 110-130
Markdown (Informal)
[Modality Matching Matters: Calibrating Language Distances for Cross-Lingual Transfer in URIEL+](https://aclanthology.org/2026.eacl-srw.8/) (Ng et al., EACL 2026)
ACL
- York Hay Ng, Aditya Khan, Xiang Lu, Matteo Salloum, Michael Zhou, Phuong Hanh Hoang, A. Seza Doğruöz, and En-Shiun Annie Lee. 2026. Modality Matching Matters: Calibrating Language Distances for Cross-Lingual Transfer in URIEL+. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 110–130, Rabat, Morocco. Association for Computational Linguistics.