@inproceedings{hachmeier-jaschke-2024-leveraging,
title = "Leveraging User-Generated Metadata of Online Videos for Cover Song Identification",
author = {Hachmeier, Simon and
J{\"a}schke, Robert},
editor = "Kruspe, Anna and
Oramas, Sergio and
Epure, Elena V. and
Sordo, Mohamed and
Weck, Benno and
Doh, SeungHeon and
Won, Minz and
Manco, Ilaria and
Meseguer-Brocal, Gabriel",
booktitle = "Proceedings of the 3rd Workshop on NLP for Music and Audio (NLP4MusA)",
month = nov,
year = "2024",
address = "Oakland, USA",
publisher = "Association for Computational Lingustics",
url = "https://aclanthology.org/2024.nlp4musa-1.8/",
pages = "43--48",
abstract = "YouTube is a rich source of cover songs. Since the platform itself is organized in terms of videos rather than songs, the retrieval of covers is not trivial. The field of cover song identification addresses this problem and provides approaches that usually rely on audio content. However, including the user-generated video metadata available on YouTube promises improved identification results. In this paper, we propose a multi-modal approach for cover song identification on online video platforms. We combine the entity resolution models with audio-based approaches using a ranking model. Our findings implicate that leveraging user-generated metadata can stabilize cover song identification performance on YouTube."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hachmeier-jaschke-2024-leveraging">
<titleInfo>
<title>Leveraging User-Generated Metadata of Online Videos for Cover Song Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Hachmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Jäschke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on NLP for Music and Audio (NLP4MusA)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kruspe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="family">Oramas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Epure</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Sordo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benno</namePart>
<namePart type="family">Weck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SeungHeon</namePart>
<namePart type="family">Doh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minz</namePart>
<namePart type="family">Won</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilaria</namePart>
<namePart type="family">Manco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Meseguer-Brocal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Lingustics</publisher>
<place>
<placeTerm type="text">Oakland, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>YouTube is a rich source of cover songs. Since the platform itself is organized in terms of videos rather than songs, the retrieval of covers is not trivial. The field of cover song identification addresses this problem and provides approaches that usually rely on audio content. However, including the user-generated video metadata available on YouTube promises improved identification results. In this paper, we propose a multi-modal approach for cover song identification on online video platforms. We combine the entity resolution models with audio-based approaches using a ranking model. Our findings implicate that leveraging user-generated metadata can stabilize cover song identification performance on YouTube.</abstract>
<identifier type="citekey">hachmeier-jaschke-2024-leveraging</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4musa-1.8/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>43</start>
<end>48</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging User-Generated Metadata of Online Videos for Cover Song Identification
%A Hachmeier, Simon
%A Jäschke, Robert
%Y Kruspe, Anna
%Y Oramas, Sergio
%Y Epure, Elena V.
%Y Sordo, Mohamed
%Y Weck, Benno
%Y Doh, SeungHeon
%Y Won, Minz
%Y Manco, Ilaria
%Y Meseguer-Brocal, Gabriel
%S Proceedings of the 3rd Workshop on NLP for Music and Audio (NLP4MusA)
%D 2024
%8 November
%I Association for Computational Lingustics
%C Oakland, USA
%F hachmeier-jaschke-2024-leveraging
%X YouTube is a rich source of cover songs. Since the platform itself is organized in terms of videos rather than songs, the retrieval of covers is not trivial. The field of cover song identification addresses this problem and provides approaches that usually rely on audio content. However, including the user-generated video metadata available on YouTube promises improved identification results. In this paper, we propose a multi-modal approach for cover song identification on online video platforms. We combine the entity resolution models with audio-based approaches using a ranking model. Our findings implicate that leveraging user-generated metadata can stabilize cover song identification performance on YouTube.
%U https://aclanthology.org/2024.nlp4musa-1.8/
%P 43-48
Markdown (Informal)
[Leveraging User-Generated Metadata of Online Videos for Cover Song Identification](https://aclanthology.org/2024.nlp4musa-1.8/) (Hachmeier & Jäschke, NLP4MusA 2024)
ACL