@inproceedings{akter-etal-2023-evaluation,
title = "On Evaluation of {B}angla Word Analogies",
author = "Akter, Mousumi and
Sarkar, Souvika and
Karmaker Santu, Shubhra Kanti",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.811",
doi = "10.18653/v1/2023.emnlp-main.811",
pages = "13121--13127",
abstract = "This paper presents a benchmark dataset of Bangla word analogies for evaluating the quality of existing Bangla word embeddings. Despite being the 7th largest spoken language in the world, Bangla is still a low-resource language and popular NLP models often struggle to perform well on Bangla data sets. Therefore, developing a robust evaluation set is crucial for benchmarking and guiding future research on improving Bangla word embeddings, which is currently missing. To address this issue, we introduce a new evaluation set of 16,678 unique word analogies in Bangla as well as a translated and curated version of the original Mikolov dataset (10,594 samples) in Bangla. Our experiments with different state-of-the-art embedding models reveal that current Bangla word embeddings struggle to achieve high accuracy on both data sets, demonstrating a significant gap in multilingual NLP research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="akter-etal-2023-evaluation">
<titleInfo>
<title>On Evaluation of Bangla Word Analogies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mousumi</namePart>
<namePart type="family">Akter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Souvika</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shubhra</namePart>
<namePart type="given">Kanti</namePart>
<namePart type="family">Karmaker Santu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a benchmark dataset of Bangla word analogies for evaluating the quality of existing Bangla word embeddings. Despite being the 7th largest spoken language in the world, Bangla is still a low-resource language and popular NLP models often struggle to perform well on Bangla data sets. Therefore, developing a robust evaluation set is crucial for benchmarking and guiding future research on improving Bangla word embeddings, which is currently missing. To address this issue, we introduce a new evaluation set of 16,678 unique word analogies in Bangla as well as a translated and curated version of the original Mikolov dataset (10,594 samples) in Bangla. Our experiments with different state-of-the-art embedding models reveal that current Bangla word embeddings struggle to achieve high accuracy on both data sets, demonstrating a significant gap in multilingual NLP research.</abstract>
<identifier type="citekey">akter-etal-2023-evaluation</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.811</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.811</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>13121</start>
<end>13127</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Evaluation of Bangla Word Analogies
%A Akter, Mousumi
%A Sarkar, Souvika
%A Karmaker Santu, Shubhra Kanti
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F akter-etal-2023-evaluation
%X This paper presents a benchmark dataset of Bangla word analogies for evaluating the quality of existing Bangla word embeddings. Despite being the 7th largest spoken language in the world, Bangla is still a low-resource language and popular NLP models often struggle to perform well on Bangla data sets. Therefore, developing a robust evaluation set is crucial for benchmarking and guiding future research on improving Bangla word embeddings, which is currently missing. To address this issue, we introduce a new evaluation set of 16,678 unique word analogies in Bangla as well as a translated and curated version of the original Mikolov dataset (10,594 samples) in Bangla. Our experiments with different state-of-the-art embedding models reveal that current Bangla word embeddings struggle to achieve high accuracy on both data sets, demonstrating a significant gap in multilingual NLP research.
%R 10.18653/v1/2023.emnlp-main.811
%U https://aclanthology.org/2023.emnlp-main.811
%U https://doi.org/10.18653/v1/2023.emnlp-main.811
%P 13121-13127
Markdown (Informal)
[On Evaluation of Bangla Word Analogies](https://aclanthology.org/2023.emnlp-main.811) (Akter et al., EMNLP 2023)
ACL
- Mousumi Akter, Souvika Sarkar, and Shubhra Kanti Karmaker Santu. 2023. On Evaluation of Bangla Word Analogies. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 13121–13127, Singapore. Association for Computational Linguistics.