@inproceedings{abdibayev-etal-2021-bpomp,
title = "{BP}o{MP}: The Benchmark of Poetic Minimal Pairs {--} Limericks, Rhyme, and Narrative Coherence",
author = "Abdibayev, Almas and
Riddell, Allen and
Rockmore, Daniel",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.1",
pages = "1--9",
abstract = "We adapt BLiMP (Benchmark of Linguistic Minimal Pairs) language model evaluation framework to the context of poetry, introducing the first of a series of tasks titled Benchmark of Poetic Minimal Pairs (BPoMP). The tasks presented herein use one genre of English-language poetry, the limerick (five-lines, rhyme scheme AABBA). Following the BLiMP schema, the BPoMP tasks use 10,000 minimal pairs of limerick/corrupted limerick. The latter is created by (1) shuffling two rhyming end-of-the-line words, (2) shuffling two rhyming lines, (3) replacing end-of-the-line word by a non-rhyming synonym. Our general task is detection of the original limerick, which we believe tests a language model{'}s capacity to utilize {``}end rhymes{''}, a common feature of poetry. We evaluate Transformer-based models by checking if they assign a higher probability to the non-corrupted limerick in each minimal pair. We find that the models identify the original limerick at rates better than chance, but with a nontrivial gap relative to human accuracy (average of 98.3{\%} across tasks). The publicly available curated set of limericks accompanying this paper is an additional contribution. In general, we see this as a first step to create a community of NLP activity around the rigorous computational study of poetry.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdibayev-etal-2021-bpomp">
<titleInfo>
<title>BPoMP: The Benchmark of Poetic Minimal Pairs – Limericks, Rhyme, and Narrative Coherence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Almas</namePart>
<namePart type="family">Abdibayev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Allen</namePart>
<namePart type="family">Riddell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Rockmore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We adapt BLiMP (Benchmark of Linguistic Minimal Pairs) language model evaluation framework to the context of poetry, introducing the first of a series of tasks titled Benchmark of Poetic Minimal Pairs (BPoMP). The tasks presented herein use one genre of English-language poetry, the limerick (five-lines, rhyme scheme AABBA). Following the BLiMP schema, the BPoMP tasks use 10,000 minimal pairs of limerick/corrupted limerick. The latter is created by (1) shuffling two rhyming end-of-the-line words, (2) shuffling two rhyming lines, (3) replacing end-of-the-line word by a non-rhyming synonym. Our general task is detection of the original limerick, which we believe tests a language model’s capacity to utilize “end rhymes”, a common feature of poetry. We evaluate Transformer-based models by checking if they assign a higher probability to the non-corrupted limerick in each minimal pair. We find that the models identify the original limerick at rates better than chance, but with a nontrivial gap relative to human accuracy (average of 98.3% across tasks). The publicly available curated set of limericks accompanying this paper is an additional contribution. In general, we see this as a first step to create a community of NLP activity around the rigorous computational study of poetry.</abstract>
<identifier type="citekey">abdibayev-etal-2021-bpomp</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.1</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BPoMP: The Benchmark of Poetic Minimal Pairs – Limericks, Rhyme, and Narrative Coherence
%A Abdibayev, Almas
%A Riddell, Allen
%A Rockmore, Daniel
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F abdibayev-etal-2021-bpomp
%X We adapt BLiMP (Benchmark of Linguistic Minimal Pairs) language model evaluation framework to the context of poetry, introducing the first of a series of tasks titled Benchmark of Poetic Minimal Pairs (BPoMP). The tasks presented herein use one genre of English-language poetry, the limerick (five-lines, rhyme scheme AABBA). Following the BLiMP schema, the BPoMP tasks use 10,000 minimal pairs of limerick/corrupted limerick. The latter is created by (1) shuffling two rhyming end-of-the-line words, (2) shuffling two rhyming lines, (3) replacing end-of-the-line word by a non-rhyming synonym. Our general task is detection of the original limerick, which we believe tests a language model’s capacity to utilize “end rhymes”, a common feature of poetry. We evaluate Transformer-based models by checking if they assign a higher probability to the non-corrupted limerick in each minimal pair. We find that the models identify the original limerick at rates better than chance, but with a nontrivial gap relative to human accuracy (average of 98.3% across tasks). The publicly available curated set of limericks accompanying this paper is an additional contribution. In general, we see this as a first step to create a community of NLP activity around the rigorous computational study of poetry.
%U https://aclanthology.org/2021.ranlp-1.1
%P 1-9
Markdown (Informal)
[BPoMP: The Benchmark of Poetic Minimal Pairs – Limericks, Rhyme, and Narrative Coherence](https://aclanthology.org/2021.ranlp-1.1) (Abdibayev et al., RANLP 2021)
ACL