@inproceedings{xu-cheng-2023-spontaneous,
title = "Spontaneous gestures encoded by hand positions improve language models: An Information-Theoretic motivated study",
author = "Xu, Yang and
Cheng, Yang",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.600",
doi = "10.18653/v1/2023.findings-acl.600",
pages = "9409--9424",
abstract = "The multi-modality nature of human communication has been utilized to enhance the performance of language modeling-related tasks. Driven by the development of large-scale end-to-end learning techniques and the availability of multi-modal data, it becomes possible to represent non-verbal communication behaviors through joint-learning, and directly study their interaction with verbal communication. However, there is still gaps in existing studies to better address the underlying mechanism of how non-verbal expression contributes to the overall communication purpose. Therefore, we explore two questions using mixed-modal language models trained against monologue video data: first, whether incorporating gesture representations can improve the language model{'}s performance (perplexity); second, whether spontaneous gestures demonstrate entropy rate constancy (ERC), which is an empirical pattern found in most verbal language data that supports the rational communication assumption from Information Theory. We have positive and interesting findings for both questions: speakers indeed use spontaneous gestures to convey {``}meaningful{''} information that enhances verbal communication, which can be captured with a simple spatial encoding scheme. More importantly, gestures are produced and organized rationally in a similar way as words, which optimizes the communication efficiency.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-cheng-2023-spontaneous">
<titleInfo>
<title>Spontaneous gestures encoded by hand positions improve language models: An Information-Theoretic motivated study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The multi-modality nature of human communication has been utilized to enhance the performance of language modeling-related tasks. Driven by the development of large-scale end-to-end learning techniques and the availability of multi-modal data, it becomes possible to represent non-verbal communication behaviors through joint-learning, and directly study their interaction with verbal communication. However, there is still gaps in existing studies to better address the underlying mechanism of how non-verbal expression contributes to the overall communication purpose. Therefore, we explore two questions using mixed-modal language models trained against monologue video data: first, whether incorporating gesture representations can improve the language model’s performance (perplexity); second, whether spontaneous gestures demonstrate entropy rate constancy (ERC), which is an empirical pattern found in most verbal language data that supports the rational communication assumption from Information Theory. We have positive and interesting findings for both questions: speakers indeed use spontaneous gestures to convey “meaningful” information that enhances verbal communication, which can be captured with a simple spatial encoding scheme. More importantly, gestures are produced and organized rationally in a similar way as words, which optimizes the communication efficiency.</abstract>
<identifier type="citekey">xu-cheng-2023-spontaneous</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.600</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.600</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>9409</start>
<end>9424</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Spontaneous gestures encoded by hand positions improve language models: An Information-Theoretic motivated study
%A Xu, Yang
%A Cheng, Yang
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F xu-cheng-2023-spontaneous
%X The multi-modality nature of human communication has been utilized to enhance the performance of language modeling-related tasks. Driven by the development of large-scale end-to-end learning techniques and the availability of multi-modal data, it becomes possible to represent non-verbal communication behaviors through joint-learning, and directly study their interaction with verbal communication. However, there is still gaps in existing studies to better address the underlying mechanism of how non-verbal expression contributes to the overall communication purpose. Therefore, we explore two questions using mixed-modal language models trained against monologue video data: first, whether incorporating gesture representations can improve the language model’s performance (perplexity); second, whether spontaneous gestures demonstrate entropy rate constancy (ERC), which is an empirical pattern found in most verbal language data that supports the rational communication assumption from Information Theory. We have positive and interesting findings for both questions: speakers indeed use spontaneous gestures to convey “meaningful” information that enhances verbal communication, which can be captured with a simple spatial encoding scheme. More importantly, gestures are produced and organized rationally in a similar way as words, which optimizes the communication efficiency.
%R 10.18653/v1/2023.findings-acl.600
%U https://aclanthology.org/2023.findings-acl.600
%U https://doi.org/10.18653/v1/2023.findings-acl.600
%P 9409-9424
Markdown (Informal)
[Spontaneous gestures encoded by hand positions improve language models: An Information-Theoretic motivated study](https://aclanthology.org/2023.findings-acl.600) (Xu & Cheng, Findings 2023)
ACL