@inproceedings{mishra-etal-2021-scaled,
title = "A Scaled Encoder Decoder Network for Image Captioning in {H}indi",
author = "Mishra, Santosh Kumar and
Saha, Sriparna and
Bhattacharyya, Pushpak",
editor = "Bandyopadhyay, Sivaji and
Devi, Sobha Lalitha and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 18th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2021",
address = "National Institute of Technology Silchar, Silchar, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2021.icon-main.30",
pages = "251--260",
abstract = "Image captioning is a prominent research area in computer vision and natural language processing, which automatically generates natural language descriptions for images. Most of the existing works have focused on developing models for image captioning in the English language. The current paper introduces a novel deep learning architecture based on encoder-decoder with an attention mechanism for image captioning in the Hindi language. For encoder, decoder, and attention, several deep learning-based architectures have been explored. Hindi, the fourth-most spoken language globally, is widely spoken in India and South Asia and is one of India{'}s official languages. The proposed encoder-decoder architecture utilizes scaling in convolution neural networks to achieve better accuracy than state-of-the-art image captioning methods in Hindi. The proposed method{'}s performance is compared with state-of-the-art methods in terms of BLEU scores and manual evaluation (in terms of adequacy and fluency). The obtained results demonstrate the efficacy of the proposed method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mishra-etal-2021-scaled">
<titleInfo>
<title>A Scaled Encoder Decoder Network for Image Captioning in Hindi</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sriparna</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="given">Lalitha</namePart>
<namePart type="family">Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">National Institute of Technology Silchar, Silchar, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Image captioning is a prominent research area in computer vision and natural language processing, which automatically generates natural language descriptions for images. Most of the existing works have focused on developing models for image captioning in the English language. The current paper introduces a novel deep learning architecture based on encoder-decoder with an attention mechanism for image captioning in the Hindi language. For encoder, decoder, and attention, several deep learning-based architectures have been explored. Hindi, the fourth-most spoken language globally, is widely spoken in India and South Asia and is one of India’s official languages. The proposed encoder-decoder architecture utilizes scaling in convolution neural networks to achieve better accuracy than state-of-the-art image captioning methods in Hindi. The proposed method’s performance is compared with state-of-the-art methods in terms of BLEU scores and manual evaluation (in terms of adequacy and fluency). The obtained results demonstrate the efficacy of the proposed method.</abstract>
<identifier type="citekey">mishra-etal-2021-scaled</identifier>
<location>
<url>https://aclanthology.org/2021.icon-main.30</url>
</location>
<part>
<date>2021-12</date>
<extent unit="page">
<start>251</start>
<end>260</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Scaled Encoder Decoder Network for Image Captioning in Hindi
%A Mishra, Santosh Kumar
%A Saha, Sriparna
%A Bhattacharyya, Pushpak
%Y Bandyopadhyay, Sivaji
%Y Devi, Sobha Lalitha
%Y Bhattacharyya, Pushpak
%S Proceedings of the 18th International Conference on Natural Language Processing (ICON)
%D 2021
%8 December
%I NLP Association of India (NLPAI)
%C National Institute of Technology Silchar, Silchar, India
%F mishra-etal-2021-scaled
%X Image captioning is a prominent research area in computer vision and natural language processing, which automatically generates natural language descriptions for images. Most of the existing works have focused on developing models for image captioning in the English language. The current paper introduces a novel deep learning architecture based on encoder-decoder with an attention mechanism for image captioning in the Hindi language. For encoder, decoder, and attention, several deep learning-based architectures have been explored. Hindi, the fourth-most spoken language globally, is widely spoken in India and South Asia and is one of India’s official languages. The proposed encoder-decoder architecture utilizes scaling in convolution neural networks to achieve better accuracy than state-of-the-art image captioning methods in Hindi. The proposed method’s performance is compared with state-of-the-art methods in terms of BLEU scores and manual evaluation (in terms of adequacy and fluency). The obtained results demonstrate the efficacy of the proposed method.
%U https://aclanthology.org/2021.icon-main.30
%P 251-260
Markdown (Informal)
[A Scaled Encoder Decoder Network for Image Captioning in Hindi](https://aclanthology.org/2021.icon-main.30) (Mishra et al., ICON 2021)
ACL
- Santosh Kumar Mishra, Sriparna Saha, and Pushpak Bhattacharyya. 2021. A Scaled Encoder Decoder Network for Image Captioning in Hindi. In Proceedings of the 18th International Conference on Natural Language Processing (ICON), pages 251–260, National Institute of Technology Silchar, Silchar, India. NLP Association of India (NLPAI).