@inproceedings{gabbolini-etal-2022-data,
title = "Data-Efficient Playlist Captioning With Musical and Linguistic Knowledge",
author = "Gabbolini, Giovanni and
Hennequin, Romain and
Epure, Elena",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.784",
doi = "10.18653/v1/2022.emnlp-main.784",
pages = "11401--11415",
abstract = "Music streaming services feature billions of playlists created by users, professional editors or algorithms. In this content overload scenario, it is crucial to characterise playlists, so that music can be effectively organised and accessed. Playlist titles and descriptions are proposed in natural language either manually by music editors and users or automatically from pre-defined templates. However, the former is time-consuming while the latter is limited by the vocabulary and covered music themes. In this work, we propose PlayNTell, a data-efficient multi-modal encoder-decoder model for automatic playlist captioning. Compared to existing music captioning algorithms, PlayNTell leverages also linguistic and musical knowledge to generate correct and thematic captions. We benchmark PlayNTell on a new editorial playlists dataset collected from two major music streaming services.PlayNTell yields 2x-3x higher BLEU@4 and CIDEr than state of the art captioning algorithms.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gabbolini-etal-2022-data">
<titleInfo>
<title>Data-Efficient Playlist Captioning With Musical and Linguistic Knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Gabbolini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Romain</namePart>
<namePart type="family">Hennequin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Epure</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Music streaming services feature billions of playlists created by users, professional editors or algorithms. In this content overload scenario, it is crucial to characterise playlists, so that music can be effectively organised and accessed. Playlist titles and descriptions are proposed in natural language either manually by music editors and users or automatically from pre-defined templates. However, the former is time-consuming while the latter is limited by the vocabulary and covered music themes. In this work, we propose PlayNTell, a data-efficient multi-modal encoder-decoder model for automatic playlist captioning. Compared to existing music captioning algorithms, PlayNTell leverages also linguistic and musical knowledge to generate correct and thematic captions. We benchmark PlayNTell on a new editorial playlists dataset collected from two major music streaming services.PlayNTell yields 2x-3x higher BLEU@4 and CIDEr than state of the art captioning algorithms.</abstract>
<identifier type="citekey">gabbolini-etal-2022-data</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.784</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.784</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>11401</start>
<end>11415</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-Efficient Playlist Captioning With Musical and Linguistic Knowledge
%A Gabbolini, Giovanni
%A Hennequin, Romain
%A Epure, Elena
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F gabbolini-etal-2022-data
%X Music streaming services feature billions of playlists created by users, professional editors or algorithms. In this content overload scenario, it is crucial to characterise playlists, so that music can be effectively organised and accessed. Playlist titles and descriptions are proposed in natural language either manually by music editors and users or automatically from pre-defined templates. However, the former is time-consuming while the latter is limited by the vocabulary and covered music themes. In this work, we propose PlayNTell, a data-efficient multi-modal encoder-decoder model for automatic playlist captioning. Compared to existing music captioning algorithms, PlayNTell leverages also linguistic and musical knowledge to generate correct and thematic captions. We benchmark PlayNTell on a new editorial playlists dataset collected from two major music streaming services.PlayNTell yields 2x-3x higher BLEU@4 and CIDEr than state of the art captioning algorithms.
%R 10.18653/v1/2022.emnlp-main.784
%U https://aclanthology.org/2022.emnlp-main.784
%U https://doi.org/10.18653/v1/2022.emnlp-main.784
%P 11401-11415
Markdown (Informal)
[Data-Efficient Playlist Captioning With Musical and Linguistic Knowledge](https://aclanthology.org/2022.emnlp-main.784) (Gabbolini et al., EMNLP 2022)
ACL