@inproceedings{montariol-allauzen-2021-measure,
title = "Measure and Evaluation of Semantic Divergence across Two Languages",
author = "Montariol, Syrielle and
Allauzen, Alexandre",
editor = "Zong, Chengqing and
Xia, Fei and
Li, Wenjie and
Navigli, Roberto",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-long.100",
doi = "10.18653/v1/2021.acl-long.100",
pages = "1247--1258",
abstract = "Languages are dynamic systems: word usage may change over time, reflecting various societal factors. However, all languages do not evolve identically: the impact of an event, the influence of a trend or thinking, can differ between communities. In this paper, we propose to track these divergences by comparing the evolution of a word and its translation across two languages. We investigate several methods of building time-varying and bilingual word embeddings, using contextualised and non-contextualised embeddings. We propose a set of scenarios to characterize semantic divergence across two languages, along with a setup to differentiate them in a bilingual corpus. We evaluate the different methods by generating a corpus of synthetic semantic change across two languages, English and French, before applying them to newspaper corpora to detect bilingual semantic divergence and provide qualitative insight for the task. We conclude that BERT embeddings coupled with a clustering step lead to the best performance on synthetic corpora; however, the performance of CBOW embeddings is very competitive and more adapted to an exploratory analysis on a large corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="montariol-allauzen-2021-measure">
<titleInfo>
<title>Measure and Evaluation of Semantic Divergence across Two Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Syrielle</namePart>
<namePart type="family">Montariol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Allauzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Languages are dynamic systems: word usage may change over time, reflecting various societal factors. However, all languages do not evolve identically: the impact of an event, the influence of a trend or thinking, can differ between communities. In this paper, we propose to track these divergences by comparing the evolution of a word and its translation across two languages. We investigate several methods of building time-varying and bilingual word embeddings, using contextualised and non-contextualised embeddings. We propose a set of scenarios to characterize semantic divergence across two languages, along with a setup to differentiate them in a bilingual corpus. We evaluate the different methods by generating a corpus of synthetic semantic change across two languages, English and French, before applying them to newspaper corpora to detect bilingual semantic divergence and provide qualitative insight for the task. We conclude that BERT embeddings coupled with a clustering step lead to the best performance on synthetic corpora; however, the performance of CBOW embeddings is very competitive and more adapted to an exploratory analysis on a large corpus.</abstract>
<identifier type="citekey">montariol-allauzen-2021-measure</identifier>
<identifier type="doi">10.18653/v1/2021.acl-long.100</identifier>
<location>
<url>https://aclanthology.org/2021.acl-long.100</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1247</start>
<end>1258</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measure and Evaluation of Semantic Divergence across Two Languages
%A Montariol, Syrielle
%A Allauzen, Alexandre
%Y Zong, Chengqing
%Y Xia, Fei
%Y Li, Wenjie
%Y Navigli, Roberto
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F montariol-allauzen-2021-measure
%X Languages are dynamic systems: word usage may change over time, reflecting various societal factors. However, all languages do not evolve identically: the impact of an event, the influence of a trend or thinking, can differ between communities. In this paper, we propose to track these divergences by comparing the evolution of a word and its translation across two languages. We investigate several methods of building time-varying and bilingual word embeddings, using contextualised and non-contextualised embeddings. We propose a set of scenarios to characterize semantic divergence across two languages, along with a setup to differentiate them in a bilingual corpus. We evaluate the different methods by generating a corpus of synthetic semantic change across two languages, English and French, before applying them to newspaper corpora to detect bilingual semantic divergence and provide qualitative insight for the task. We conclude that BERT embeddings coupled with a clustering step lead to the best performance on synthetic corpora; however, the performance of CBOW embeddings is very competitive and more adapted to an exploratory analysis on a large corpus.
%R 10.18653/v1/2021.acl-long.100
%U https://aclanthology.org/2021.acl-long.100
%U https://doi.org/10.18653/v1/2021.acl-long.100
%P 1247-1258
Markdown (Informal)
[Measure and Evaluation of Semantic Divergence across Two Languages](https://aclanthology.org/2021.acl-long.100) (Montariol & Allauzen, ACL-IJCNLP 2021)
ACL
- Syrielle Montariol and Alexandre Allauzen. 2021. Measure and Evaluation of Semantic Divergence across Two Languages. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pages 1247–1258, Online. Association for Computational Linguistics.