@inproceedings{bafna-etal-2025-dialup,
title = "{D}ial{U}p! Modeling the Language Continuum by Adapting Models to Dialects and Dialects to Models",
author = "Bafna, Niyati and
Chang, Emily and
Robinson, Nathaniel Romney and
Mortensen, David R. and
Murray, Kenton and
Yarowsky, David and
Sirin, Hale",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.989/",
doi = "10.18653/v1/2025.acl-long.989",
pages = "20188--20233",
ISBN = "979-8-89176-251-0",
abstract = "Most of the world{'}s languages and dialects are low-resource, and lack support in mainstream machine translation (MT) models. However, many of them have a closely-related high-resource language (HRL) neighbor, and differ in linguistically regular ways from it. This underscores the importance of model robustness to dialectal variation and cross-lingual generalization to the HRL dialect continuum. We present DialUp, consisting of a training-time technique for adapting a pretrained model to dialectal data (M{--}{\ensuremath{>}}D), and an inference-time intervention adapting dialectal data to the model expertise (D{--}{\ensuremath{>}}M). M{--}{\ensuremath{>}}D induces model robustness to potentially unseen and unknown dialects by exposure to synthetic data exemplifying linguistic mechanisms of dialectal variation, whereas D{--}{\ensuremath{>}}M treats dialectal divergence for known target dialects. These methods show considerable performance gains for several dialects from four language families, and modest gains for two other language families. We also conduct feature and error analyses, which show that language varieties with low baseline MT performance are more likely to benefit from these approaches."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bafna-etal-2025-dialup">
<titleInfo>
<title>DialUp! Modeling the Language Continuum by Adapting Models to Dialects and Dialects to Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Niyati</namePart>
<namePart type="family">Bafna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="given">Romney</namePart>
<namePart type="family">Robinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Mortensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenton</namePart>
<namePart type="family">Murray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Yarowsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hale</namePart>
<namePart type="family">Sirin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Most of the world’s languages and dialects are low-resource, and lack support in mainstream machine translation (MT) models. However, many of them have a closely-related high-resource language (HRL) neighbor, and differ in linguistically regular ways from it. This underscores the importance of model robustness to dialectal variation and cross-lingual generalization to the HRL dialect continuum. We present DialUp, consisting of a training-time technique for adapting a pretrained model to dialectal data (M–\ensuremath>D), and an inference-time intervention adapting dialectal data to the model expertise (D–\ensuremath>M). M–\ensuremath>D induces model robustness to potentially unseen and unknown dialects by exposure to synthetic data exemplifying linguistic mechanisms of dialectal variation, whereas D–\ensuremath>M treats dialectal divergence for known target dialects. These methods show considerable performance gains for several dialects from four language families, and modest gains for two other language families. We also conduct feature and error analyses, which show that language varieties with low baseline MT performance are more likely to benefit from these approaches.</abstract>
<identifier type="citekey">bafna-etal-2025-dialup</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.989</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.989/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>20188</start>
<end>20233</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DialUp! Modeling the Language Continuum by Adapting Models to Dialects and Dialects to Models
%A Bafna, Niyati
%A Chang, Emily
%A Robinson, Nathaniel Romney
%A Mortensen, David R.
%A Murray, Kenton
%A Yarowsky, David
%A Sirin, Hale
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F bafna-etal-2025-dialup
%X Most of the world’s languages and dialects are low-resource, and lack support in mainstream machine translation (MT) models. However, many of them have a closely-related high-resource language (HRL) neighbor, and differ in linguistically regular ways from it. This underscores the importance of model robustness to dialectal variation and cross-lingual generalization to the HRL dialect continuum. We present DialUp, consisting of a training-time technique for adapting a pretrained model to dialectal data (M–\ensuremath>D), and an inference-time intervention adapting dialectal data to the model expertise (D–\ensuremath>M). M–\ensuremath>D induces model robustness to potentially unseen and unknown dialects by exposure to synthetic data exemplifying linguistic mechanisms of dialectal variation, whereas D–\ensuremath>M treats dialectal divergence for known target dialects. These methods show considerable performance gains for several dialects from four language families, and modest gains for two other language families. We also conduct feature and error analyses, which show that language varieties with low baseline MT performance are more likely to benefit from these approaches.
%R 10.18653/v1/2025.acl-long.989
%U https://aclanthology.org/2025.acl-long.989/
%U https://doi.org/10.18653/v1/2025.acl-long.989
%P 20188-20233
Markdown (Informal)
[DialUp! Modeling the Language Continuum by Adapting Models to Dialects and Dialects to Models](https://aclanthology.org/2025.acl-long.989/) (Bafna et al., ACL 2025)
ACL