@inproceedings{ali-gatla-2023-bhojpuri,
title = "{B}hojpuri {W}ord{N}et: Problems in Translating {H}indi Synsets into {B}hojpuri",
author = "Ali, Imran and
Gatla, Praveen",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.7",
pages = "60--68",
abstract = "Today, artificial intelligence systems are incredibly intelligent, however they lack the human like capacity for understanding. In this context, sense-based lexical resources become a requirement for artificially intelligent machines. Lexical resources like Wordnets have received scholarly attention because they are considered as the crucial sense-based resources in the field of natural language understanding. They can help in knowing the intended meaning of the communicated texts, as they are focused on the concept rather than the words. Wordnets are available only for 18 Indian languages. Keeping this in mind, we have initiated the development of a comprehensive wordnet for Bhojpuri. The present paper describes the creation of the synsets of Bhojpuri and discusses the problems that we faced while translating Hindi synsets into Bhojpuri. They are lexical anomalies, lexical mismatch words, synthesized forms, lack of technical words etc. Nearly 4000 Hindi synsets were mapped for their equivalent synsets in Bhojpuri following the expansion approach. We have also worked on the language-specific synsets, which are unique to Bhojpuri. This resource is useful in machine translation, sentiment analysis, word sense disambiguation, cross-lingual references among Indian languages, and Bhojpuri language teaching and learning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ali-gatla-2023-bhojpuri">
<titleInfo>
<title>Bhojpuri WordNet: Problems in Translating Hindi Synsets into Bhojpuri</title>
</titleInfo>
<name type="personal">
<namePart type="given">Imran</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Praveen</namePart>
<namePart type="family">Gatla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Today, artificial intelligence systems are incredibly intelligent, however they lack the human like capacity for understanding. In this context, sense-based lexical resources become a requirement for artificially intelligent machines. Lexical resources like Wordnets have received scholarly attention because they are considered as the crucial sense-based resources in the field of natural language understanding. They can help in knowing the intended meaning of the communicated texts, as they are focused on the concept rather than the words. Wordnets are available only for 18 Indian languages. Keeping this in mind, we have initiated the development of a comprehensive wordnet for Bhojpuri. The present paper describes the creation of the synsets of Bhojpuri and discusses the problems that we faced while translating Hindi synsets into Bhojpuri. They are lexical anomalies, lexical mismatch words, synthesized forms, lack of technical words etc. Nearly 4000 Hindi synsets were mapped for their equivalent synsets in Bhojpuri following the expansion approach. We have also worked on the language-specific synsets, which are unique to Bhojpuri. This resource is useful in machine translation, sentiment analysis, word sense disambiguation, cross-lingual references among Indian languages, and Bhojpuri language teaching and learning.</abstract>
<identifier type="citekey">ali-gatla-2023-bhojpuri</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.7</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>60</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bhojpuri WordNet: Problems in Translating Hindi Synsets into Bhojpuri
%A Ali, Imran
%A Gatla, Praveen
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F ali-gatla-2023-bhojpuri
%X Today, artificial intelligence systems are incredibly intelligent, however they lack the human like capacity for understanding. In this context, sense-based lexical resources become a requirement for artificially intelligent machines. Lexical resources like Wordnets have received scholarly attention because they are considered as the crucial sense-based resources in the field of natural language understanding. They can help in knowing the intended meaning of the communicated texts, as they are focused on the concept rather than the words. Wordnets are available only for 18 Indian languages. Keeping this in mind, we have initiated the development of a comprehensive wordnet for Bhojpuri. The present paper describes the creation of the synsets of Bhojpuri and discusses the problems that we faced while translating Hindi synsets into Bhojpuri. They are lexical anomalies, lexical mismatch words, synthesized forms, lack of technical words etc. Nearly 4000 Hindi synsets were mapped for their equivalent synsets in Bhojpuri following the expansion approach. We have also worked on the language-specific synsets, which are unique to Bhojpuri. This resource is useful in machine translation, sentiment analysis, word sense disambiguation, cross-lingual references among Indian languages, and Bhojpuri language teaching and learning.
%U https://aclanthology.org/2023.ranlp-1.7
%P 60-68
Markdown (Informal)
[Bhojpuri WordNet: Problems in Translating Hindi Synsets into Bhojpuri](https://aclanthology.org/2023.ranlp-1.7) (Ali & Gatla, RANLP 2023)
ACL