@inproceedings{lag-etal-2025-mapping,
title = "Mapping {Faroese} in the Multilingual Representation Space: {Insights} for {ASR} Model Optimization",
author = "L{\'a}g, D{\'a}vid {\'i} and
Scalvini, Barbara and
Gudnason, Jon",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.38/",
pages = "354--358",
ISBN = "978-9908-53-109-0",
abstract = "ASR development for low-resource languages like Faroese faces significant challenges due to the scarcity of large, diverse datasets. While fine-tuning multilingual models using related languages is a common practice, there is no standardized method for selecting these auxiliary languages, leading to a computationally expensive trial-and-error process. By analyzing Faroese`s positioning among other languages in wav2vec2`s multilingual representation space, we find that Faroese`s closest neighbors are influenced not only by linguistic similarity but also by historical, phonetic, and cultural factors. These findings open new avenues for auxiliary language selection to improve Faroese ASR and underscore the potential value of data-driven factors in ASR fine-tuning."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lag-etal-2025-mapping">
<titleInfo>
<title>Mapping Faroese in the Multilingual Representation Space: Insights for ASR Model Optimization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dávid</namePart>
<namePart type="given">í</namePart>
<namePart type="family">Lág</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Scalvini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jon</namePart>
<namePart type="family">Gudnason</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>ASR development for low-resource languages like Faroese faces significant challenges due to the scarcity of large, diverse datasets. While fine-tuning multilingual models using related languages is a common practice, there is no standardized method for selecting these auxiliary languages, leading to a computationally expensive trial-and-error process. By analyzing Faroese‘s positioning among other languages in wav2vec2‘s multilingual representation space, we find that Faroese‘s closest neighbors are influenced not only by linguistic similarity but also by historical, phonetic, and cultural factors. These findings open new avenues for auxiliary language selection to improve Faroese ASR and underscore the potential value of data-driven factors in ASR fine-tuning.</abstract>
<identifier type="citekey">lag-etal-2025-mapping</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.38/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>354</start>
<end>358</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mapping Faroese in the Multilingual Representation Space: Insights for ASR Model Optimization
%A Lág, Dávid í
%A Scalvini, Barbara
%A Gudnason, Jon
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F lag-etal-2025-mapping
%X ASR development for low-resource languages like Faroese faces significant challenges due to the scarcity of large, diverse datasets. While fine-tuning multilingual models using related languages is a common practice, there is no standardized method for selecting these auxiliary languages, leading to a computationally expensive trial-and-error process. By analyzing Faroese‘s positioning among other languages in wav2vec2‘s multilingual representation space, we find that Faroese‘s closest neighbors are influenced not only by linguistic similarity but also by historical, phonetic, and cultural factors. These findings open new avenues for auxiliary language selection to improve Faroese ASR and underscore the potential value of data-driven factors in ASR fine-tuning.
%U https://aclanthology.org/2025.nodalida-1.38/
%P 354-358
Markdown (Informal)
[Mapping Faroese in the Multilingual Representation Space: Insights for ASR Model Optimization](https://aclanthology.org/2025.nodalida-1.38/) (Lág et al., NoDaLiDa 2025)
ACL