@inproceedings{popescu-belis-etal-2023-gpoet,
title = "{GP}oe{T}: a Language Model Trained for Rhyme Generation on Synthetic Data",
author = "Popescu-Belis, Andrei and
Atrio, {\`A}lex R. and
Bernath, Bastien and
Boisson, Etienne and
Ferrari, Teo and
Theimer-Lienhard, Xavier and
Vernikos, Giorgos",
editor = "Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Reiter, Nils and
Szpakowicz, Stan",
booktitle = "Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.latechclfl-1.2",
doi = "10.18653/v1/2023.latechclfl-1.2",
pages = "10--20",
abstract = "Poem generation with language models requires the modeling of rhyming patterns. We propose a novel solution for learning to rhyme, based on synthetic data generated with a rule-based rhyming algorithm. The algorithm and an evaluation metric use a phonetic dictionary and the definitions of perfect and assonant rhymes. We fine-tune a GPT-2 English model with 124M parameters on 142 MB of natural poems and find that this model generates consecutive rhymes infrequently (11{\%}). We then fine-tune the model on 6 MB of synthetic quatrains with consecutive rhymes (AABB) and obtain nearly 60{\%} of rhyming lines in samples generated by the model. Alternating rhymes (ABAB) are more difficult to model because of longer-range dependencies, but they are still learnable from synthetic data, reaching 45{\%} of rhyming lines in generated samples.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="popescu-belis-etal-2023-gpoet">
<titleInfo>
<title>GPoeT: a Language Model Trained for Rhyme Generation on Synthetic Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Popescu-Belis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Àlex</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Atrio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bastien</namePart>
<namePart type="family">Bernath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Etienne</namePart>
<namePart type="family">Boisson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teo</namePart>
<namePart type="family">Ferrari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xavier</namePart>
<namePart type="family">Theimer-Lienhard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giorgos</namePart>
<namePart type="family">Vernikos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Poem generation with language models requires the modeling of rhyming patterns. We propose a novel solution for learning to rhyme, based on synthetic data generated with a rule-based rhyming algorithm. The algorithm and an evaluation metric use a phonetic dictionary and the definitions of perfect and assonant rhymes. We fine-tune a GPT-2 English model with 124M parameters on 142 MB of natural poems and find that this model generates consecutive rhymes infrequently (11%). We then fine-tune the model on 6 MB of synthetic quatrains with consecutive rhymes (AABB) and obtain nearly 60% of rhyming lines in samples generated by the model. Alternating rhymes (ABAB) are more difficult to model because of longer-range dependencies, but they are still learnable from synthetic data, reaching 45% of rhyming lines in generated samples.</abstract>
<identifier type="citekey">popescu-belis-etal-2023-gpoet</identifier>
<identifier type="doi">10.18653/v1/2023.latechclfl-1.2</identifier>
<location>
<url>https://aclanthology.org/2023.latechclfl-1.2</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>10</start>
<end>20</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GPoeT: a Language Model Trained for Rhyme Generation on Synthetic Data
%A Popescu-Belis, Andrei
%A Atrio, Àlex R.
%A Bernath, Bastien
%A Boisson, Etienne
%A Ferrari, Teo
%A Theimer-Lienhard, Xavier
%A Vernikos, Giorgos
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Reiter, Nils
%Y Szpakowicz, Stan
%S Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F popescu-belis-etal-2023-gpoet
%X Poem generation with language models requires the modeling of rhyming patterns. We propose a novel solution for learning to rhyme, based on synthetic data generated with a rule-based rhyming algorithm. The algorithm and an evaluation metric use a phonetic dictionary and the definitions of perfect and assonant rhymes. We fine-tune a GPT-2 English model with 124M parameters on 142 MB of natural poems and find that this model generates consecutive rhymes infrequently (11%). We then fine-tune the model on 6 MB of synthetic quatrains with consecutive rhymes (AABB) and obtain nearly 60% of rhyming lines in samples generated by the model. Alternating rhymes (ABAB) are more difficult to model because of longer-range dependencies, but they are still learnable from synthetic data, reaching 45% of rhyming lines in generated samples.
%R 10.18653/v1/2023.latechclfl-1.2
%U https://aclanthology.org/2023.latechclfl-1.2
%U https://doi.org/10.18653/v1/2023.latechclfl-1.2
%P 10-20
Markdown (Informal)
[GPoeT: a Language Model Trained for Rhyme Generation on Synthetic Data](https://aclanthology.org/2023.latechclfl-1.2) (Popescu-Belis et al., LaTeCHCLfL 2023)
ACL
- Andrei Popescu-Belis, Àlex R. Atrio, Bastien Bernath, Etienne Boisson, Teo Ferrari, Xavier Theimer-Lienhard, and Giorgos Vernikos. 2023. GPoeT: a Language Model Trained for Rhyme Generation on Synthetic Data. In Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pages 10–20, Dubrovnik, Croatia. Association for Computational Linguistics.