@inproceedings{vahtola-etal-2023-guiding,
title = "Guiding Zero-Shot Paraphrase Generation with Fine-Grained Control Tokens",
author = "Vahtola, Teemu and
Creutz, Mathias and
Tiedemann, Jrg",
editor = "Palmer, Alexis and
Camacho-collados, Jose",
booktitle = "Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.starsem-1.29",
doi = "10.18653/v1/2023.starsem-1.29",
pages = "323--337",
abstract = "Sequence-to-sequence paraphrase generation models often struggle with the generation of diverse paraphrases. This deficiency constrains the viability of leveraging paraphrase generation in different Natural Language Processing tasks. We propose a translation-based guided paraphrase generation model that learns useful features for promoting surface form variation in generated paraphrases from cross-lingual parallel data. Our proposed method leverages multilingual neural machine translation pretraining to learn zero-shot paraphrasing. Furthermore, we incorporate dedicated prefix tokens into the training of the machine translation models to promote variation. The prefix tokens are designed to affect various linguistic features related to surface form realizations, and can be applied during inference to guide the decoding process towards a desired solution. We assess the proposed guided model on paraphrase generation in three languages, English, Finnish, and Swedish, and provide analysis on the feasibility of the prefix tokens to guided paraphrasing. Our analysis suggests that the attributes represented by the prefix tokens are useful in promoting variation, by pushing the paraphrases generated by the guided model to diverge from the input sentence while preserving semantics conveyed by the sentence well.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vahtola-etal-2023-guiding">
<titleInfo>
<title>Guiding Zero-Shot Paraphrase Generation with Fine-Grained Control Tokens</title>
</titleInfo>
<name type="personal">
<namePart type="given">Teemu</namePart>
<namePart type="family">Vahtola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathias</namePart>
<namePart type="family">Creutz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jrg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sequence-to-sequence paraphrase generation models often struggle with the generation of diverse paraphrases. This deficiency constrains the viability of leveraging paraphrase generation in different Natural Language Processing tasks. We propose a translation-based guided paraphrase generation model that learns useful features for promoting surface form variation in generated paraphrases from cross-lingual parallel data. Our proposed method leverages multilingual neural machine translation pretraining to learn zero-shot paraphrasing. Furthermore, we incorporate dedicated prefix tokens into the training of the machine translation models to promote variation. The prefix tokens are designed to affect various linguistic features related to surface form realizations, and can be applied during inference to guide the decoding process towards a desired solution. We assess the proposed guided model on paraphrase generation in three languages, English, Finnish, and Swedish, and provide analysis on the feasibility of the prefix tokens to guided paraphrasing. Our analysis suggests that the attributes represented by the prefix tokens are useful in promoting variation, by pushing the paraphrases generated by the guided model to diverge from the input sentence while preserving semantics conveyed by the sentence well.</abstract>
<identifier type="citekey">vahtola-etal-2023-guiding</identifier>
<identifier type="doi">10.18653/v1/2023.starsem-1.29</identifier>
<location>
<url>https://aclanthology.org/2023.starsem-1.29</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>323</start>
<end>337</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Guiding Zero-Shot Paraphrase Generation with Fine-Grained Control Tokens
%A Vahtola, Teemu
%A Creutz, Mathias
%A Tiedemann, Jrg
%Y Palmer, Alexis
%Y Camacho-collados, Jose
%S Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F vahtola-etal-2023-guiding
%X Sequence-to-sequence paraphrase generation models often struggle with the generation of diverse paraphrases. This deficiency constrains the viability of leveraging paraphrase generation in different Natural Language Processing tasks. We propose a translation-based guided paraphrase generation model that learns useful features for promoting surface form variation in generated paraphrases from cross-lingual parallel data. Our proposed method leverages multilingual neural machine translation pretraining to learn zero-shot paraphrasing. Furthermore, we incorporate dedicated prefix tokens into the training of the machine translation models to promote variation. The prefix tokens are designed to affect various linguistic features related to surface form realizations, and can be applied during inference to guide the decoding process towards a desired solution. We assess the proposed guided model on paraphrase generation in three languages, English, Finnish, and Swedish, and provide analysis on the feasibility of the prefix tokens to guided paraphrasing. Our analysis suggests that the attributes represented by the prefix tokens are useful in promoting variation, by pushing the paraphrases generated by the guided model to diverge from the input sentence while preserving semantics conveyed by the sentence well.
%R 10.18653/v1/2023.starsem-1.29
%U https://aclanthology.org/2023.starsem-1.29
%U https://doi.org/10.18653/v1/2023.starsem-1.29
%P 323-337
Markdown (Informal)
[Guiding Zero-Shot Paraphrase Generation with Fine-Grained Control Tokens](https://aclanthology.org/2023.starsem-1.29) (Vahtola et al., *SEM 2023)
ACL