@inproceedings{chen-avgustinova-2021-language,
title = "Are Language-Agnostic Sentence Representations Actually Language-Agnostic?",
author = "Chen, Yu and
Avgustinova, Tania",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.32",
pages = "274--280",
abstract = "With the emergence of pre-trained multilingual models, multilingual embeddings have been widely applied in various natural language processing tasks. Language-agnostic models provide a versatile way to convert linguistic units from different languages into a shared vector representation space. The relevant work on multilingual sentence embeddings has reportedly reached low error rate in cross-lingual similarity search tasks. In this paper, we apply the pre-trained embedding models and the cross-lingual similarity search task in diverse scenarios, and observed large discrepancy in results in comparison to the original paper. Our findings on cross-lingual similarity search with different newly constructed multilingual datasets show not only correlation with observable language similarities but also strong influence from factors such as translation paths, which limits the interpretation of the language-agnostic property of the LASER model. {\%}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-avgustinova-2021-language">
<titleInfo>
<title>Are Language-Agnostic Sentence Representations Actually Language-Agnostic?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tania</namePart>
<namePart type="family">Avgustinova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the emergence of pre-trained multilingual models, multilingual embeddings have been widely applied in various natural language processing tasks. Language-agnostic models provide a versatile way to convert linguistic units from different languages into a shared vector representation space. The relevant work on multilingual sentence embeddings has reportedly reached low error rate in cross-lingual similarity search tasks. In this paper, we apply the pre-trained embedding models and the cross-lingual similarity search task in diverse scenarios, and observed large discrepancy in results in comparison to the original paper. Our findings on cross-lingual similarity search with different newly constructed multilingual datasets show not only correlation with observable language similarities but also strong influence from factors such as translation paths, which limits the interpretation of the language-agnostic property of the LASER model. %</abstract>
<identifier type="citekey">chen-avgustinova-2021-language</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.32</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>274</start>
<end>280</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Are Language-Agnostic Sentence Representations Actually Language-Agnostic?
%A Chen, Yu
%A Avgustinova, Tania
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F chen-avgustinova-2021-language
%X With the emergence of pre-trained multilingual models, multilingual embeddings have been widely applied in various natural language processing tasks. Language-agnostic models provide a versatile way to convert linguistic units from different languages into a shared vector representation space. The relevant work on multilingual sentence embeddings has reportedly reached low error rate in cross-lingual similarity search tasks. In this paper, we apply the pre-trained embedding models and the cross-lingual similarity search task in diverse scenarios, and observed large discrepancy in results in comparison to the original paper. Our findings on cross-lingual similarity search with different newly constructed multilingual datasets show not only correlation with observable language similarities but also strong influence from factors such as translation paths, which limits the interpretation of the language-agnostic property of the LASER model. %
%U https://aclanthology.org/2021.ranlp-1.32
%P 274-280
Markdown (Informal)
[Are Language-Agnostic Sentence Representations Actually Language-Agnostic?](https://aclanthology.org/2021.ranlp-1.32) (Chen & Avgustinova, RANLP 2021)
ACL