@inproceedings{parcalabescu-etal-2021-multimodality,
title = "What is Multimodality?",
author = "Parcalabescu, Letitia and
Trost, Nils and
Frank, Anette",
editor = "Donatelli, Lucia and
Krishnaswamy, Nikhil and
Lai, Kenneth and
Pustejovsky, James",
booktitle = "Proceedings of the 1st Workshop on Multimodal Semantic Representations (MMSR)",
month = jun,
year = "2021",
address = "Groningen, Netherlands (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.mmsr-1.1",
pages = "1--10",
abstract = "The last years have shown rapid developments in the field of multimodal machine learning, combining e.g., vision, text or speech. In this position paper we explain how the field uses outdated definitions of multimodality that prove unfit for the machine learning era. We propose a new task-relative definition of (multi)modality in the context of multimodal machine learning that focuses on representations and information that are relevant for a given machine learning task. With our new definition of multimodality we aim to provide a missing foundation for multimodal research, an important component of language grounding and a crucial milestone towards NLU.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="parcalabescu-etal-2021-multimodality">
<titleInfo>
<title>What is Multimodality?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Letitia</namePart>
<namePart type="family">Parcalabescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Trost</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anette</namePart>
<namePart type="family">Frank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Multimodal Semantic Representations (MMSR)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikhil</namePart>
<namePart type="family">Krishnaswamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Lai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Groningen, Netherlands (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The last years have shown rapid developments in the field of multimodal machine learning, combining e.g., vision, text or speech. In this position paper we explain how the field uses outdated definitions of multimodality that prove unfit for the machine learning era. We propose a new task-relative definition of (multi)modality in the context of multimodal machine learning that focuses on representations and information that are relevant for a given machine learning task. With our new definition of multimodality we aim to provide a missing foundation for multimodal research, an important component of language grounding and a crucial milestone towards NLU.</abstract>
<identifier type="citekey">parcalabescu-etal-2021-multimodality</identifier>
<location>
<url>https://aclanthology.org/2021.mmsr-1.1</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What is Multimodality?
%A Parcalabescu, Letitia
%A Trost, Nils
%A Frank, Anette
%Y Donatelli, Lucia
%Y Krishnaswamy, Nikhil
%Y Lai, Kenneth
%Y Pustejovsky, James
%S Proceedings of the 1st Workshop on Multimodal Semantic Representations (MMSR)
%D 2021
%8 June
%I Association for Computational Linguistics
%C Groningen, Netherlands (Online)
%F parcalabescu-etal-2021-multimodality
%X The last years have shown rapid developments in the field of multimodal machine learning, combining e.g., vision, text or speech. In this position paper we explain how the field uses outdated definitions of multimodality that prove unfit for the machine learning era. We propose a new task-relative definition of (multi)modality in the context of multimodal machine learning that focuses on representations and information that are relevant for a given machine learning task. With our new definition of multimodality we aim to provide a missing foundation for multimodal research, an important component of language grounding and a crucial milestone towards NLU.
%U https://aclanthology.org/2021.mmsr-1.1
%P 1-10
Markdown (Informal)
[What is Multimodality?](https://aclanthology.org/2021.mmsr-1.1) (Parcalabescu et al., MMSR 2021)
ACL
- Letitia Parcalabescu, Nils Trost, and Anette Frank. 2021. What is Multimodality?. In Proceedings of the 1st Workshop on Multimodal Semantic Representations (MMSR), pages 1–10, Groningen, Netherlands (Online). Association for Computational Linguistics.