@inproceedings{langenhorst-etal-2023-keyness,
title = "Keyness in song lyrics: Challenges of highly clumpy data",
author = "Langenhorst, Jan and
Frommherz, Yannick and
Meier-Vieracker, Simon",
editor = "Schneider, Roman and
Gertrud, Faa{\ss}",
booktitle = "Journal for Language Technology and Computational Linguistics, Vol. 36 No. 1",
month = may,
year = "2023",
address = "unknown",
publisher = "German Society for Computational Lingustics and Language Technology",
url = "https://aclanthology.org/2023.jlcl-1.3",
doi = "10.21248/jlcl.36.2023.236",
pages = "21--38",
abstract = "Computer-assisted stylistic analyses regularly employ the calculation of keywords. We show that the inclusion of a separate dispersion measure in addition to a frequency measure into keyword analysis (or more generally: keyness analysis), as proposed by Gries (2021), is a necessary extension of said analyses. Using texts from the German Songkorpus, we demonstrate that traditional keyword calculations using only frequency measures lead to spurious results. Determining keywords by both measuring a word{'}s frequency and its dispersion in comparison to a reference corpus gives a more realistic view. This is especially relevant for our corpus, since song lyrics turn out to be extraordinarily clumpy data: Words that are very frequent in one artist{'}s subcorpus typically only occur in a few or even just a single one of their songs due to widespread word repetition within songs, e.g., in choruses. Song lyrics in our dataset are shown to not feature words that can be considered key at all. Our contribution is twofold: (1) We demonstrate the utility of Gries{'} (2021) approach and (2) interpret the (lack of) results in terms of a genre-specific property which is that song lyrics are lexically autonomous works of art.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="langenhorst-etal-2023-keyness">
<titleInfo>
<title>Keyness in song lyrics: Challenges of highly clumpy data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Langenhorst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yannick</namePart>
<namePart type="family">Frommherz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Meier-Vieracker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Journal for Language Technology and Computational Linguistics, Vol. 36 No. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faaß</namePart>
<namePart type="family">Gertrud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>German Society for Computational Lingustics and Language Technology</publisher>
<place>
<placeTerm type="text">unknown</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Computer-assisted stylistic analyses regularly employ the calculation of keywords. We show that the inclusion of a separate dispersion measure in addition to a frequency measure into keyword analysis (or more generally: keyness analysis), as proposed by Gries (2021), is a necessary extension of said analyses. Using texts from the German Songkorpus, we demonstrate that traditional keyword calculations using only frequency measures lead to spurious results. Determining keywords by both measuring a word’s frequency and its dispersion in comparison to a reference corpus gives a more realistic view. This is especially relevant for our corpus, since song lyrics turn out to be extraordinarily clumpy data: Words that are very frequent in one artist’s subcorpus typically only occur in a few or even just a single one of their songs due to widespread word repetition within songs, e.g., in choruses. Song lyrics in our dataset are shown to not feature words that can be considered key at all. Our contribution is twofold: (1) We demonstrate the utility of Gries’ (2021) approach and (2) interpret the (lack of) results in terms of a genre-specific property which is that song lyrics are lexically autonomous works of art.</abstract>
<identifier type="citekey">langenhorst-etal-2023-keyness</identifier>
<identifier type="doi">10.21248/jlcl.36.2023.236</identifier>
<location>
<url>https://aclanthology.org/2023.jlcl-1.3</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>21</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Keyness in song lyrics: Challenges of highly clumpy data
%A Langenhorst, Jan
%A Frommherz, Yannick
%A Meier-Vieracker, Simon
%Y Schneider, Roman
%Y Gertrud, Faaß
%S Journal for Language Technology and Computational Linguistics, Vol. 36 No. 1
%D 2023
%8 May
%I German Society for Computational Lingustics and Language Technology
%C unknown
%F langenhorst-etal-2023-keyness
%X Computer-assisted stylistic analyses regularly employ the calculation of keywords. We show that the inclusion of a separate dispersion measure in addition to a frequency measure into keyword analysis (or more generally: keyness analysis), as proposed by Gries (2021), is a necessary extension of said analyses. Using texts from the German Songkorpus, we demonstrate that traditional keyword calculations using only frequency measures lead to spurious results. Determining keywords by both measuring a word’s frequency and its dispersion in comparison to a reference corpus gives a more realistic view. This is especially relevant for our corpus, since song lyrics turn out to be extraordinarily clumpy data: Words that are very frequent in one artist’s subcorpus typically only occur in a few or even just a single one of their songs due to widespread word repetition within songs, e.g., in choruses. Song lyrics in our dataset are shown to not feature words that can be considered key at all. Our contribution is twofold: (1) We demonstrate the utility of Gries’ (2021) approach and (2) interpret the (lack of) results in terms of a genre-specific property which is that song lyrics are lexically autonomous works of art.
%R 10.21248/jlcl.36.2023.236
%U https://aclanthology.org/2023.jlcl-1.3
%U https://doi.org/10.21248/jlcl.36.2023.236
%P 21-38
Markdown (Informal)
[Keyness in song lyrics: Challenges of highly clumpy data](https://aclanthology.org/2023.jlcl-1.3) (Langenhorst et al., JLCL 2023)
ACL
- Jan Langenhorst, Yannick Frommherz, and Simon Meier-Vieracker. 2023. Keyness in song lyrics: Challenges of highly clumpy data. In Journal for Language Technology and Computational Linguistics, Vol. 36 No. 1, pages 21–38, unknown. German Society for Computational Lingustics and Language Technology.