@inproceedings{zhong-miyao-2021-leveraging,
title = "Leveraging Partial Dependency Trees to Control Image Captions",
author = "Zhong, Wenjie and
Miyao, Yusuke",
editor = "{Xin} and
Hu, Ronghang and
Hudson, Drew and
Fu, Tsu-Jui and
Rohrbach, Marcus and
Fried, Daniel",
booktitle = "Proceedings of the Second Workshop on Advances in Language and Vision Research",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.alvr-1.3",
doi = "10.18653/v1/2021.alvr-1.3",
pages = "16--21",
abstract = "Controlling the generation of image captions attracts lots of attention recently. In this paper, we propose a framework leveraging partial syntactic dependency trees as control signals to make image captions include specified words and their syntactic structures. To achieve this purpose, we propose a Syntactic Dependency Structure Aware Model (SDSAM), which explicitly learns to generate the syntactic structures of image captions to include given partial dependency trees. In addition, we come up with a metric to evaluate how many specified words and their syntactic dependencies are included in generated captions. We carry out experiments on two standard datasets: Microsoft COCO and Flickr30k. Empirical results show that image captions generated by our model are effectively controlled in terms of specified words and their syntactic structures. The code is available on GitHub.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhong-miyao-2021-leveraging">
<titleInfo>
<title>Leveraging Partial Dependency Trees to Control Image Captions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Zhong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Advances in Language and Vision Research</title>
</titleInfo>
<name>
<namePart>Xin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronghang</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Drew</namePart>
<namePart type="family">Hudson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsu-Jui</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcus</namePart>
<namePart type="family">Rohrbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Fried</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Controlling the generation of image captions attracts lots of attention recently. In this paper, we propose a framework leveraging partial syntactic dependency trees as control signals to make image captions include specified words and their syntactic structures. To achieve this purpose, we propose a Syntactic Dependency Structure Aware Model (SDSAM), which explicitly learns to generate the syntactic structures of image captions to include given partial dependency trees. In addition, we come up with a metric to evaluate how many specified words and their syntactic dependencies are included in generated captions. We carry out experiments on two standard datasets: Microsoft COCO and Flickr30k. Empirical results show that image captions generated by our model are effectively controlled in terms of specified words and their syntactic structures. The code is available on GitHub.</abstract>
<identifier type="citekey">zhong-miyao-2021-leveraging</identifier>
<identifier type="doi">10.18653/v1/2021.alvr-1.3</identifier>
<location>
<url>https://aclanthology.org/2021.alvr-1.3</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>16</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Partial Dependency Trees to Control Image Captions
%A Zhong, Wenjie
%A Miyao, Yusuke
%Y Hu, Ronghang
%Y Hudson, Drew
%Y Fu, Tsu-Jui
%Y Rohrbach, Marcus
%Y Fried, Daniel
%E Xin
%S Proceedings of the Second Workshop on Advances in Language and Vision Research
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F zhong-miyao-2021-leveraging
%X Controlling the generation of image captions attracts lots of attention recently. In this paper, we propose a framework leveraging partial syntactic dependency trees as control signals to make image captions include specified words and their syntactic structures. To achieve this purpose, we propose a Syntactic Dependency Structure Aware Model (SDSAM), which explicitly learns to generate the syntactic structures of image captions to include given partial dependency trees. In addition, we come up with a metric to evaluate how many specified words and their syntactic dependencies are included in generated captions. We carry out experiments on two standard datasets: Microsoft COCO and Flickr30k. Empirical results show that image captions generated by our model are effectively controlled in terms of specified words and their syntactic structures. The code is available on GitHub.
%R 10.18653/v1/2021.alvr-1.3
%U https://aclanthology.org/2021.alvr-1.3
%U https://doi.org/10.18653/v1/2021.alvr-1.3
%P 16-21
Markdown (Informal)
[Leveraging Partial Dependency Trees to Control Image Captions](https://aclanthology.org/2021.alvr-1.3) (Zhong & Miyao, ALVR 2021)
ACL