@inproceedings{vasilakis-etal-2024-evaluation,
title = "Evaluation of Pretrained Language Models on Music Understanding",
author = "Vasilakis, Yannis and
Bittner, Rachel and
Pauwels, Johan",
editor = "Kruspe, Anna and
Oramas, Sergio and
Epure, Elena V. and
Sordo, Mohamed and
Weck, Benno and
Doh, SeungHeon and
Won, Minz and
Manco, Ilaria and
Meseguer-Brocal, Gabriel",
booktitle = "Proceedings of the 3rd Workshop on NLP for Music and Audio (NLP4MusA)",
month = nov,
year = "2024",
address = "Oakland, USA",
publisher = "Association for Computational Lingustics",
url = "https://aclanthology.org/2024.nlp4musa-1.16/",
pages = "98--106",
abstract = "Music-text multimodal systems have enabled new approaches to Music Information Research (MIR) applications. Despite the reported success, there has been little effort in evaluating the musical knowledge of Large Language Models (LLM). We demonstrate that LLMs suffer from prompt sensitivity, inability to model negation and sensitivity towards specific words. We quantified these properties as a triplet-based accuracy, evaluating the ability to model the relative similarity of labels in a hierarchical ontology. We leveraged Audioset ontology to generate triplets consisting of anchor, positive and negative label for genre/instruments sub-tree and use six general-purpose Transformer-based models. Triplets required filtering, as some were difficult to judge and therefore relatively uninformative for evaluation purposes. Despite the relatively high accuracy reported, inconsistencies are evident in all six models, suggesting that off-the-shelf LLMs need adaptation to music before use."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vasilakis-etal-2024-evaluation">
<titleInfo>
<title>Evaluation of Pretrained Language Models on Music Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yannis</namePart>
<namePart type="family">Vasilakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Bittner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johan</namePart>
<namePart type="family">Pauwels</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on NLP for Music and Audio (NLP4MusA)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kruspe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="family">Oramas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Epure</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Sordo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benno</namePart>
<namePart type="family">Weck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SeungHeon</namePart>
<namePart type="family">Doh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minz</namePart>
<namePart type="family">Won</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilaria</namePart>
<namePart type="family">Manco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Meseguer-Brocal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Lingustics</publisher>
<place>
<placeTerm type="text">Oakland, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Music-text multimodal systems have enabled new approaches to Music Information Research (MIR) applications. Despite the reported success, there has been little effort in evaluating the musical knowledge of Large Language Models (LLM). We demonstrate that LLMs suffer from prompt sensitivity, inability to model negation and sensitivity towards specific words. We quantified these properties as a triplet-based accuracy, evaluating the ability to model the relative similarity of labels in a hierarchical ontology. We leveraged Audioset ontology to generate triplets consisting of anchor, positive and negative label for genre/instruments sub-tree and use six general-purpose Transformer-based models. Triplets required filtering, as some were difficult to judge and therefore relatively uninformative for evaluation purposes. Despite the relatively high accuracy reported, inconsistencies are evident in all six models, suggesting that off-the-shelf LLMs need adaptation to music before use.</abstract>
<identifier type="citekey">vasilakis-etal-2024-evaluation</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4musa-1.16/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>98</start>
<end>106</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation of Pretrained Language Models on Music Understanding
%A Vasilakis, Yannis
%A Bittner, Rachel
%A Pauwels, Johan
%Y Kruspe, Anna
%Y Oramas, Sergio
%Y Epure, Elena V.
%Y Sordo, Mohamed
%Y Weck, Benno
%Y Doh, SeungHeon
%Y Won, Minz
%Y Manco, Ilaria
%Y Meseguer-Brocal, Gabriel
%S Proceedings of the 3rd Workshop on NLP for Music and Audio (NLP4MusA)
%D 2024
%8 November
%I Association for Computational Lingustics
%C Oakland, USA
%F vasilakis-etal-2024-evaluation
%X Music-text multimodal systems have enabled new approaches to Music Information Research (MIR) applications. Despite the reported success, there has been little effort in evaluating the musical knowledge of Large Language Models (LLM). We demonstrate that LLMs suffer from prompt sensitivity, inability to model negation and sensitivity towards specific words. We quantified these properties as a triplet-based accuracy, evaluating the ability to model the relative similarity of labels in a hierarchical ontology. We leveraged Audioset ontology to generate triplets consisting of anchor, positive and negative label for genre/instruments sub-tree and use six general-purpose Transformer-based models. Triplets required filtering, as some were difficult to judge and therefore relatively uninformative for evaluation purposes. Despite the relatively high accuracy reported, inconsistencies are evident in all six models, suggesting that off-the-shelf LLMs need adaptation to music before use.
%U https://aclanthology.org/2024.nlp4musa-1.16/
%P 98-106
Markdown (Informal)
[Evaluation of Pretrained Language Models on Music Understanding](https://aclanthology.org/2024.nlp4musa-1.16/) (Vasilakis et al., NLP4MusA 2024)
ACL