@inproceedings{das-singh-2021-image,
title = "Image Caption Generation Framework for {A}ssamese News using Attention Mechanism",
author = "Das, Ringki and
Singh, Thoudam Doren",
booktitle = "Proceedings of the 18th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2021",
address = "National Institute of Technology Silchar, Silchar, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2021.icon-main.28",
pages = "231--239",
abstract = "Automatic caption generation is an artificial intelligence problem that falls at the intersection of computer vision and natural language processing. Although significant works have been reported in image captioning, the contribution is limited to English and few major languages with sufficient resources. But, no work on image captioning has been reported in a resource-constrained language like Assamese. With this inspiration, we propose an encoder-decoder based framework for image caption generation in the Assamese news domain. The VGG-16 pre-trained model at the encoder side and LSTM with an attention mechanism are employed at the decoder side to generate the Assamese caption. We train the proposed model on the dataset built in-house consisting of 10,000 images with a single caption for each image. We describe our experimental methodology, quantitative and qualitative results which validate the effectiveness of our model for caption generation. The proposed model shows a BLEU score of 12.1 outperforming the baseline model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="das-singh-2021-image">
<titleInfo>
<title>Image Caption Generation Framework for Assamese News using Attention Mechanism</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ringki</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thoudam</namePart>
<namePart type="given">Doren</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">National Institute of Technology Silchar, Silchar, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic caption generation is an artificial intelligence problem that falls at the intersection of computer vision and natural language processing. Although significant works have been reported in image captioning, the contribution is limited to English and few major languages with sufficient resources. But, no work on image captioning has been reported in a resource-constrained language like Assamese. With this inspiration, we propose an encoder-decoder based framework for image caption generation in the Assamese news domain. The VGG-16 pre-trained model at the encoder side and LSTM with an attention mechanism are employed at the decoder side to generate the Assamese caption. We train the proposed model on the dataset built in-house consisting of 10,000 images with a single caption for each image. We describe our experimental methodology, quantitative and qualitative results which validate the effectiveness of our model for caption generation. The proposed model shows a BLEU score of 12.1 outperforming the baseline model.</abstract>
<identifier type="citekey">das-singh-2021-image</identifier>
<location>
<url>https://aclanthology.org/2021.icon-main.28</url>
</location>
<part>
<date>2021-12</date>
<extent unit="page">
<start>231</start>
<end>239</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Image Caption Generation Framework for Assamese News using Attention Mechanism
%A Das, Ringki
%A Singh, Thoudam Doren
%S Proceedings of the 18th International Conference on Natural Language Processing (ICON)
%D 2021
%8 December
%I NLP Association of India (NLPAI)
%C National Institute of Technology Silchar, Silchar, India
%F das-singh-2021-image
%X Automatic caption generation is an artificial intelligence problem that falls at the intersection of computer vision and natural language processing. Although significant works have been reported in image captioning, the contribution is limited to English and few major languages with sufficient resources. But, no work on image captioning has been reported in a resource-constrained language like Assamese. With this inspiration, we propose an encoder-decoder based framework for image caption generation in the Assamese news domain. The VGG-16 pre-trained model at the encoder side and LSTM with an attention mechanism are employed at the decoder side to generate the Assamese caption. We train the proposed model on the dataset built in-house consisting of 10,000 images with a single caption for each image. We describe our experimental methodology, quantitative and qualitative results which validate the effectiveness of our model for caption generation. The proposed model shows a BLEU score of 12.1 outperforming the baseline model.
%U https://aclanthology.org/2021.icon-main.28
%P 231-239
Markdown (Informal)
[Image Caption Generation Framework for Assamese News using Attention Mechanism](https://aclanthology.org/2021.icon-main.28) (Das & Singh, ICON 2021)
ACL