@inproceedings{gamonal-etal-2025-audition,
title = "Audition: A Frame-Annotated Multimodal Dataset for Accessible Audiovisual Content",
author = "Gamonal, Maucha Andrade and
Torrent, Tiago Timponi and
Matos, Ely Edison and
Pagano, Adriana S. and
Belcavello, Frederico and
Mayer, Fl{\'a}via Affonso and
Lorenzi, Arthur and
Sigiliano, Natalia S. and
Abreu, Helen de Andrade and
Dutra, L{\'i}via Vicente and
Viridiano, Marcelo and
Coneglian, Andr{\'e} and
Herbst, Victor A. S. and
Campos, Franciany O. and
Brown, Kenneth and
Ruiz, L{\'i}via Padua and
Bonoto, Lisandra Carvalho and
Pereira, Luiz Fernando and
Navarro, Yulla Liquer",
editor = "Harry, Bunt",
booktitle = "Proceedings of the 21st Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-21)",
month = sep,
year = "2025",
address = {D{\"u}sseldorf, Germany},
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.isa-1.10/",
ISBN = "979-8-89176-319-7",
abstract = "This paper presents a multimodal semantic analysis of accessible Brazilian short films using a frame-based annotation approach. We introduce a subset of the Audition dataset, comprising six short films from the animation and documentary genres. We analysed three communicative modes: original audio, audio description, and visual content. Trained annotators semantically annotated each mode following the FrameNet Brazil multimodal methodology. To compare meaning across modalities, we used cosine similarity over frame-semantic representations. Results show that audio description aligns more closely with video content than original audio, reflecting its role in translating visual meaning into language. Our findings demonstrate the effectiveness of frame semantics in modelling meaning across modalities and provide quantitative evidence of audio description as a bridge between visual and verbal communication. The dataset and annotation strategies are a valuable resource for research on multimodal representation, semantic similarity, and accessible media."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gamonal-etal-2025-audition">
<titleInfo>
<title>Audition: A Frame-Annotated Multimodal Dataset for Accessible Audiovisual Content</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maucha</namePart>
<namePart type="given">Andrade</namePart>
<namePart type="family">Gamonal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="given">Timponi</namePart>
<namePart type="family">Torrent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ely</namePart>
<namePart type="given">Edison</namePart>
<namePart type="family">Matos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adriana</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Pagano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederico</namePart>
<namePart type="family">Belcavello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flávia</namePart>
<namePart type="given">Affonso</namePart>
<namePart type="family">Mayer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arthur</namePart>
<namePart type="family">Lorenzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalia</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Sigiliano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="given">de</namePart>
<namePart type="given">Andrade</namePart>
<namePart type="family">Abreu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lívia</namePart>
<namePart type="given">Vicente</namePart>
<namePart type="family">Dutra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcelo</namePart>
<namePart type="family">Viridiano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Coneglian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="given">A</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Herbst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franciany</namePart>
<namePart type="given">O</namePart>
<namePart type="family">Campos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Brown</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lívia</namePart>
<namePart type="given">Padua</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lisandra</namePart>
<namePart type="given">Carvalho</namePart>
<namePart type="family">Bonoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luiz</namePart>
<namePart type="given">Fernando</namePart>
<namePart type="family">Pereira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulla</namePart>
<namePart type="given">Liquer</namePart>
<namePart type="family">Navarro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-21)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bunt</namePart>
<namePart type="family">Harry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Düsseldorf, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-319-7</identifier>
</relatedItem>
<abstract>This paper presents a multimodal semantic analysis of accessible Brazilian short films using a frame-based annotation approach. We introduce a subset of the Audition dataset, comprising six short films from the animation and documentary genres. We analysed three communicative modes: original audio, audio description, and visual content. Trained annotators semantically annotated each mode following the FrameNet Brazil multimodal methodology. To compare meaning across modalities, we used cosine similarity over frame-semantic representations. Results show that audio description aligns more closely with video content than original audio, reflecting its role in translating visual meaning into language. Our findings demonstrate the effectiveness of frame semantics in modelling meaning across modalities and provide quantitative evidence of audio description as a bridge between visual and verbal communication. The dataset and annotation strategies are a valuable resource for research on multimodal representation, semantic similarity, and accessible media.</abstract>
<identifier type="citekey">gamonal-etal-2025-audition</identifier>
<location>
<url>https://aclanthology.org/2025.isa-1.10/</url>
</location>
<part>
<date>2025-09</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Audition: A Frame-Annotated Multimodal Dataset for Accessible Audiovisual Content
%A Gamonal, Maucha Andrade
%A Torrent, Tiago Timponi
%A Matos, Ely Edison
%A Pagano, Adriana S.
%A Belcavello, Frederico
%A Mayer, Flávia Affonso
%A Lorenzi, Arthur
%A Sigiliano, Natalia S.
%A Abreu, Helen de Andrade
%A Dutra, Lívia Vicente
%A Viridiano, Marcelo
%A Coneglian, André
%A Herbst, Victor A. S.
%A Campos, Franciany O.
%A Brown, Kenneth
%A Ruiz, Lívia Padua
%A Bonoto, Lisandra Carvalho
%A Pereira, Luiz Fernando
%A Navarro, Yulla Liquer
%Y Harry, Bunt
%S Proceedings of the 21st Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-21)
%D 2025
%8 September
%I Association for Computational Linguistics
%C Düsseldorf, Germany
%@ 979-8-89176-319-7
%F gamonal-etal-2025-audition
%X This paper presents a multimodal semantic analysis of accessible Brazilian short films using a frame-based annotation approach. We introduce a subset of the Audition dataset, comprising six short films from the animation and documentary genres. We analysed three communicative modes: original audio, audio description, and visual content. Trained annotators semantically annotated each mode following the FrameNet Brazil multimodal methodology. To compare meaning across modalities, we used cosine similarity over frame-semantic representations. Results show that audio description aligns more closely with video content than original audio, reflecting its role in translating visual meaning into language. Our findings demonstrate the effectiveness of frame semantics in modelling meaning across modalities and provide quantitative evidence of audio description as a bridge between visual and verbal communication. The dataset and annotation strategies are a valuable resource for research on multimodal representation, semantic similarity, and accessible media.
%U https://aclanthology.org/2025.isa-1.10/
Markdown (Informal)
[Audition: A Frame-Annotated Multimodal Dataset for Accessible Audiovisual Content](https://aclanthology.org/2025.isa-1.10/) (Gamonal et al., ISA 2025)
ACL
- Maucha Andrade Gamonal, Tiago Timponi Torrent, Ely Edison Matos, Adriana S. Pagano, Frederico Belcavello, Flávia Affonso Mayer, Arthur Lorenzi, Natalia S. Sigiliano, Helen de Andrade Abreu, Lívia Vicente Dutra, Marcelo Viridiano, André Coneglian, Victor A. S. Herbst, Franciany O. Campos, Kenneth Brown, Lívia Padua Ruiz, Lisandra Carvalho Bonoto, Luiz Fernando Pereira, and Yulla Liquer Navarro. 2025. Audition: A Frame-Annotated Multimodal Dataset for Accessible Audiovisual Content. In Proceedings of the 21st Joint ACL - ISO Workshop on Interoperable Semantic Annotation (ISA-21), Düsseldorf, Germany. Association for Computational Linguistics.