@inproceedings{nie-etal-2020-pragmatic,
title = "Pragmatic Issue-Sensitive Image Captioning",
author = "Nie, Allen and
Cohn-Gordon, Reuben and
Potts, Christopher",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.173",
doi = "10.18653/v1/2020.findings-emnlp.173",
pages = "1924--1938",
abstract = "Image captioning systems need to produce texts that are not only true but also relevant in that they are properly aligned with the current issues. For instance, in a newspaper article about a sports event, a caption that not only identifies the player in a picture but also comments on their ethnicity could create unwanted reader reactions. To address this, we propose Issue-Sensitive Image Captioning (ISIC). In ISIC, the captioner is given a target image and an issue, which is a set of images partitioned in a way that specifies what information is relevant. For the sports article, we could construct a partition that places images into equivalence classes based on player position. To model this task, we use an extension of the Rational Speech Acts model. Our extension is built on top of state-of-the-art pretrained neural image captioners and explicitly uses image partitions to control caption generation. In both automatic and human evaluations, we show that these models generate captions that are descriptive and issue-sensitive. Finally, we show how ISIC can complement and enrich the related task of Visual Question Answering.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nie-etal-2020-pragmatic">
<titleInfo>
<title>Pragmatic Issue-Sensitive Image Captioning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Allen</namePart>
<namePart type="family">Nie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reuben</namePart>
<namePart type="family">Cohn-Gordon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Potts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Image captioning systems need to produce texts that are not only true but also relevant in that they are properly aligned with the current issues. For instance, in a newspaper article about a sports event, a caption that not only identifies the player in a picture but also comments on their ethnicity could create unwanted reader reactions. To address this, we propose Issue-Sensitive Image Captioning (ISIC). In ISIC, the captioner is given a target image and an issue, which is a set of images partitioned in a way that specifies what information is relevant. For the sports article, we could construct a partition that places images into equivalence classes based on player position. To model this task, we use an extension of the Rational Speech Acts model. Our extension is built on top of state-of-the-art pretrained neural image captioners and explicitly uses image partitions to control caption generation. In both automatic and human evaluations, we show that these models generate captions that are descriptive and issue-sensitive. Finally, we show how ISIC can complement and enrich the related task of Visual Question Answering.</abstract>
<identifier type="citekey">nie-etal-2020-pragmatic</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.173</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.173</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1924</start>
<end>1938</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pragmatic Issue-Sensitive Image Captioning
%A Nie, Allen
%A Cohn-Gordon, Reuben
%A Potts, Christopher
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F nie-etal-2020-pragmatic
%X Image captioning systems need to produce texts that are not only true but also relevant in that they are properly aligned with the current issues. For instance, in a newspaper article about a sports event, a caption that not only identifies the player in a picture but also comments on their ethnicity could create unwanted reader reactions. To address this, we propose Issue-Sensitive Image Captioning (ISIC). In ISIC, the captioner is given a target image and an issue, which is a set of images partitioned in a way that specifies what information is relevant. For the sports article, we could construct a partition that places images into equivalence classes based on player position. To model this task, we use an extension of the Rational Speech Acts model. Our extension is built on top of state-of-the-art pretrained neural image captioners and explicitly uses image partitions to control caption generation. In both automatic and human evaluations, we show that these models generate captions that are descriptive and issue-sensitive. Finally, we show how ISIC can complement and enrich the related task of Visual Question Answering.
%R 10.18653/v1/2020.findings-emnlp.173
%U https://aclanthology.org/2020.findings-emnlp.173
%U https://doi.org/10.18653/v1/2020.findings-emnlp.173
%P 1924-1938
Markdown (Informal)
[Pragmatic Issue-Sensitive Image Captioning](https://aclanthology.org/2020.findings-emnlp.173) (Nie et al., Findings 2020)
ACL
- Allen Nie, Reuben Cohn-Gordon, and Christopher Potts. 2020. Pragmatic Issue-Sensitive Image Captioning. In Findings of the Association for Computational Linguistics: EMNLP 2020, pages 1924–1938, Online. Association for Computational Linguistics.