@inproceedings{jung-etal-2024-mitigating,
title = "Mitigating the Linguistic Gap with Phonemic Representations for Robust Cross-lingual Transfer",
author = "Jung, Haeji and
Oh, Changdae and
Kang, Jooeon and
Sohn, Jimin and
Song, Kyungwoo and
Kim, Jinkyu and
Mortensen, David R",
editor = {S{\"a}lev{\"a}, Jonne and
Owodunni, Abraham},
booktitle = "Proceedings of the Fourth Workshop on Multilingual Representation Learning (MRL 2024)",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.mrl-1.16",
pages = "200--211",
abstract = "Approaches to improving multilingual language understanding often struggle with significant performance gaps between high-resource and low-resource languages. While there are efforts to align the languages in a single latent space to mitigate such gaps, how different input-level representations influence such gaps has not been investigated, particularly with phonemic inputs. We hypothesize that the performance gaps are affected by representation discrepancies between those languages, and revisit the use of phonemic representations as a means to mitigate these discrepancies.To demonstrate the effectiveness of phonemic representations, we present experiments on three representative cross-lingual tasks on 12 languages in total. The results show that phonemic representations exhibit higher similarities between languages compared to orthographic representations, and it consistently outperforms grapheme-based baseline model on languages that are relatively low-resourced.We present quantitative evidence from three cross-lingual tasks that demonstrate the effectiveness of phonemic representations, and it is further justified by a theoretical analysis of the cross-lingual performance gap.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jung-etal-2024-mitigating">
<titleInfo>
<title>Mitigating the Linguistic Gap with Phonemic Representations for Robust Cross-lingual Transfer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haeji</namePart>
<namePart type="family">Jung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Changdae</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jooeon</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Sohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyungwoo</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinkyu</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Mortensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Multilingual Representation Learning (MRL 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonne</namePart>
<namePart type="family">Sälevä</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abraham</namePart>
<namePart type="family">Owodunni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Approaches to improving multilingual language understanding often struggle with significant performance gaps between high-resource and low-resource languages. While there are efforts to align the languages in a single latent space to mitigate such gaps, how different input-level representations influence such gaps has not been investigated, particularly with phonemic inputs. We hypothesize that the performance gaps are affected by representation discrepancies between those languages, and revisit the use of phonemic representations as a means to mitigate these discrepancies.To demonstrate the effectiveness of phonemic representations, we present experiments on three representative cross-lingual tasks on 12 languages in total. The results show that phonemic representations exhibit higher similarities between languages compared to orthographic representations, and it consistently outperforms grapheme-based baseline model on languages that are relatively low-resourced.We present quantitative evidence from three cross-lingual tasks that demonstrate the effectiveness of phonemic representations, and it is further justified by a theoretical analysis of the cross-lingual performance gap.</abstract>
<identifier type="citekey">jung-etal-2024-mitigating</identifier>
<location>
<url>https://aclanthology.org/2024.mrl-1.16</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>200</start>
<end>211</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mitigating the Linguistic Gap with Phonemic Representations for Robust Cross-lingual Transfer
%A Jung, Haeji
%A Oh, Changdae
%A Kang, Jooeon
%A Sohn, Jimin
%A Song, Kyungwoo
%A Kim, Jinkyu
%A Mortensen, David R.
%Y Sälevä, Jonne
%Y Owodunni, Abraham
%S Proceedings of the Fourth Workshop on Multilingual Representation Learning (MRL 2024)
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F jung-etal-2024-mitigating
%X Approaches to improving multilingual language understanding often struggle with significant performance gaps between high-resource and low-resource languages. While there are efforts to align the languages in a single latent space to mitigate such gaps, how different input-level representations influence such gaps has not been investigated, particularly with phonemic inputs. We hypothesize that the performance gaps are affected by representation discrepancies between those languages, and revisit the use of phonemic representations as a means to mitigate these discrepancies.To demonstrate the effectiveness of phonemic representations, we present experiments on three representative cross-lingual tasks on 12 languages in total. The results show that phonemic representations exhibit higher similarities between languages compared to orthographic representations, and it consistently outperforms grapheme-based baseline model on languages that are relatively low-resourced.We present quantitative evidence from three cross-lingual tasks that demonstrate the effectiveness of phonemic representations, and it is further justified by a theoretical analysis of the cross-lingual performance gap.
%U https://aclanthology.org/2024.mrl-1.16
%P 200-211
Markdown (Informal)
[Mitigating the Linguistic Gap with Phonemic Representations for Robust Cross-lingual Transfer](https://aclanthology.org/2024.mrl-1.16) (Jung et al., MRL 2024)
ACL
- Haeji Jung, Changdae Oh, Jooeon Kang, Jimin Sohn, Kyungwoo Song, Jinkyu Kim, and David R Mortensen. 2024. Mitigating the Linguistic Gap with Phonemic Representations for Robust Cross-lingual Transfer. In Proceedings of the Fourth Workshop on Multilingual Representation Learning (MRL 2024), pages 200–211, Miami, Florida, USA. Association for Computational Linguistics.