@inproceedings{wu-smith-2023-composition,
title = "Composition and Deformance: Measuring Imageability with a Text-to-Image Model",
author = "Wu, Si and
Smith, David",
editor = "Akoury, Nader and
Clark, Elizabeth and
Iyyer, Mohit and
Chaturvedi, Snigdha and
Brahman, Faeze and
Chandu, Khyathi",
booktitle = "Proceedings of the 5th Workshop on Narrative Understanding",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wnu-1.16",
doi = "10.18653/v1/2023.wnu-1.16",
pages = "106--117",
abstract = "Although psycholinguists and psychologists have long studied the tendency of linguistic strings to evoke mental images in hearers or readers, most computational studies have applied this concept of imageability only to isolated words. Using recent developments in text-to-image generation models, such as DALLE mini, we propose computational methods that use generated images to measure the imageability of both single English words and connected text. We sample text prompts for image generation from three corpora: human-generated image captions, news article sentences, and poem lines. We subject these prompts to different deformances to examine the model{'}s ability to detect changes in imageability caused by compositional change. We find high correlation between the proposed computational measures of imageability and human judgments of individual words. We also find the proposed measures more consistently respond to changes in compositionality than baseline approaches. We discuss possible effects of model training and implications for the study of compositionality in text-to-image models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-smith-2023-composition">
<titleInfo>
<title>Composition and Deformance: Measuring Imageability with a Text-to-Image Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Si</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Narrative Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nader</namePart>
<namePart type="family">Akoury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Iyyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Snigdha</namePart>
<namePart type="family">Chaturvedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faeze</namePart>
<namePart type="family">Brahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khyathi</namePart>
<namePart type="family">Chandu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although psycholinguists and psychologists have long studied the tendency of linguistic strings to evoke mental images in hearers or readers, most computational studies have applied this concept of imageability only to isolated words. Using recent developments in text-to-image generation models, such as DALLE mini, we propose computational methods that use generated images to measure the imageability of both single English words and connected text. We sample text prompts for image generation from three corpora: human-generated image captions, news article sentences, and poem lines. We subject these prompts to different deformances to examine the model’s ability to detect changes in imageability caused by compositional change. We find high correlation between the proposed computational measures of imageability and human judgments of individual words. We also find the proposed measures more consistently respond to changes in compositionality than baseline approaches. We discuss possible effects of model training and implications for the study of compositionality in text-to-image models.</abstract>
<identifier type="citekey">wu-smith-2023-composition</identifier>
<identifier type="doi">10.18653/v1/2023.wnu-1.16</identifier>
<location>
<url>https://aclanthology.org/2023.wnu-1.16</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>106</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Composition and Deformance: Measuring Imageability with a Text-to-Image Model
%A Wu, Si
%A Smith, David
%Y Akoury, Nader
%Y Clark, Elizabeth
%Y Iyyer, Mohit
%Y Chaturvedi, Snigdha
%Y Brahman, Faeze
%Y Chandu, Khyathi
%S Proceedings of the 5th Workshop on Narrative Understanding
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F wu-smith-2023-composition
%X Although psycholinguists and psychologists have long studied the tendency of linguistic strings to evoke mental images in hearers or readers, most computational studies have applied this concept of imageability only to isolated words. Using recent developments in text-to-image generation models, such as DALLE mini, we propose computational methods that use generated images to measure the imageability of both single English words and connected text. We sample text prompts for image generation from three corpora: human-generated image captions, news article sentences, and poem lines. We subject these prompts to different deformances to examine the model’s ability to detect changes in imageability caused by compositional change. We find high correlation between the proposed computational measures of imageability and human judgments of individual words. We also find the proposed measures more consistently respond to changes in compositionality than baseline approaches. We discuss possible effects of model training and implications for the study of compositionality in text-to-image models.
%R 10.18653/v1/2023.wnu-1.16
%U https://aclanthology.org/2023.wnu-1.16
%U https://doi.org/10.18653/v1/2023.wnu-1.16
%P 106-117
Markdown (Informal)
[Composition and Deformance: Measuring Imageability with a Text-to-Image Model](https://aclanthology.org/2023.wnu-1.16) (Wu & Smith, WNU 2023)
ACL