@inproceedings{marinelli-etal-2025-leveraging,
title = "Leveraging {RAG} for a Low-Resource Audio-Aware Diachronic Analysis of Gendered Toy Marketing",
author = "Marinelli, Luca and
Ghinassi, Iacopo and
Saitis, Charalampos",
editor = "Arachchige, Isuri Nanomi and
Frontini, Francesca and
Mitkov, Ruslan and
Rayson, Paul",
booktitle = "Proceedings of the First on Natural Language Processing and Language Models for Digital Humanities",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.lm4dh-1.9/",
pages = "102--111",
abstract = "We performed a diachronic analysis of sound and language in toy commercials, leveraging retrieval-augmented generation (RAG) and open-weight language models in low-resource settings. A pool of 2508 UK toy advertisements spanning 14 years was semi-automatically annotated, integrating thematic coding of transcripts with audio annotation. With our RAG pipeline, we thematically coded and classified commercials by gender-target audience (feminine, masculine, or mixed) achieving substantial inter-coder reliability. In parallel, a music-focused multitask model was applied to annotate affective and mid-level musical perceptual attributes, enabling multimodal discourse analysis. Our findings reveal significant diachronic shifts and enduring patterns. Soundtracks classified as energizing registered an overall increase across distinct themes and audiences, but such increase was steeper for masculine-adjacent commercials. Moreover, themes stereotypically associated with masculinity paired more frequently with louder, distorted, and aggressive music, while stereotypically feminine themes with softer, calmer, and more harmonious soundtracks. Code and data to reproduce the results are available on github.com/marinelliluca/low-resource-RAG."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marinelli-etal-2025-leveraging">
<titleInfo>
<title>Leveraging RAG for a Low-Resource Audio-Aware Diachronic Analysis of Gendered Toy Marketing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Marinelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iacopo</namePart>
<namePart type="family">Ghinassi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charalampos</namePart>
<namePart type="family">Saitis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First on Natural Language Processing and Language Models for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isuri</namePart>
<namePart type="given">Nanomi</namePart>
<namePart type="family">Arachchige</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesca</namePart>
<namePart type="family">Frontini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We performed a diachronic analysis of sound and language in toy commercials, leveraging retrieval-augmented generation (RAG) and open-weight language models in low-resource settings. A pool of 2508 UK toy advertisements spanning 14 years was semi-automatically annotated, integrating thematic coding of transcripts with audio annotation. With our RAG pipeline, we thematically coded and classified commercials by gender-target audience (feminine, masculine, or mixed) achieving substantial inter-coder reliability. In parallel, a music-focused multitask model was applied to annotate affective and mid-level musical perceptual attributes, enabling multimodal discourse analysis. Our findings reveal significant diachronic shifts and enduring patterns. Soundtracks classified as energizing registered an overall increase across distinct themes and audiences, but such increase was steeper for masculine-adjacent commercials. Moreover, themes stereotypically associated with masculinity paired more frequently with louder, distorted, and aggressive music, while stereotypically feminine themes with softer, calmer, and more harmonious soundtracks. Code and data to reproduce the results are available on github.com/marinelliluca/low-resource-RAG.</abstract>
<identifier type="citekey">marinelli-etal-2025-leveraging</identifier>
<location>
<url>https://aclanthology.org/2025.lm4dh-1.9/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>102</start>
<end>111</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging RAG for a Low-Resource Audio-Aware Diachronic Analysis of Gendered Toy Marketing
%A Marinelli, Luca
%A Ghinassi, Iacopo
%A Saitis, Charalampos
%Y Arachchige, Isuri Nanomi
%Y Frontini, Francesca
%Y Mitkov, Ruslan
%Y Rayson, Paul
%S Proceedings of the First on Natural Language Processing and Language Models for Digital Humanities
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F marinelli-etal-2025-leveraging
%X We performed a diachronic analysis of sound and language in toy commercials, leveraging retrieval-augmented generation (RAG) and open-weight language models in low-resource settings. A pool of 2508 UK toy advertisements spanning 14 years was semi-automatically annotated, integrating thematic coding of transcripts with audio annotation. With our RAG pipeline, we thematically coded and classified commercials by gender-target audience (feminine, masculine, or mixed) achieving substantial inter-coder reliability. In parallel, a music-focused multitask model was applied to annotate affective and mid-level musical perceptual attributes, enabling multimodal discourse analysis. Our findings reveal significant diachronic shifts and enduring patterns. Soundtracks classified as energizing registered an overall increase across distinct themes and audiences, but such increase was steeper for masculine-adjacent commercials. Moreover, themes stereotypically associated with masculinity paired more frequently with louder, distorted, and aggressive music, while stereotypically feminine themes with softer, calmer, and more harmonious soundtracks. Code and data to reproduce the results are available on github.com/marinelliluca/low-resource-RAG.
%U https://aclanthology.org/2025.lm4dh-1.9/
%P 102-111
Markdown (Informal)
[Leveraging RAG for a Low-Resource Audio-Aware Diachronic Analysis of Gendered Toy Marketing](https://aclanthology.org/2025.lm4dh-1.9/) (Marinelli et al., LM4DH 2025)
ACL