@inproceedings{pang-2019-towards,
title = "Towards Actual (Not Operational) Textual Style Transfer Auto-Evaluation",
author = "Pang, Richard Yuanzhe",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5557",
doi = "10.18653/v1/D19-5557",
pages = "444--445",
abstract = "Regarding the problem of automatically generating paraphrases with modified styles or attributes, the difficulty lies in the lack of parallel corpora. Numerous advances have been proposed for the generation. However, significant problems remain with the auto-evaluation of style transfer tasks. Based on the summary of Pang and Gimpel (2018) and Mir et al. (2019), style transfer evaluations rely on three metrics: post-transfer style classification accuracy, content or semantic similarity, and naturalness or fluency. We elucidate the dangerous current state of style transfer auto-evaluation research. Moreover, we propose ways to aggregate the three metrics into one evaluator. This abstract aims to bring researchers to think about the future of style transfer and style transfer evaluation research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pang-2019-towards">
<titleInfo>
<title>Towards Actual (Not Operational) Textual Style Transfer Auto-Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="given">Yuanzhe</namePart>
<namePart type="family">Pang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Regarding the problem of automatically generating paraphrases with modified styles or attributes, the difficulty lies in the lack of parallel corpora. Numerous advances have been proposed for the generation. However, significant problems remain with the auto-evaluation of style transfer tasks. Based on the summary of Pang and Gimpel (2018) and Mir et al. (2019), style transfer evaluations rely on three metrics: post-transfer style classification accuracy, content or semantic similarity, and naturalness or fluency. We elucidate the dangerous current state of style transfer auto-evaluation research. Moreover, we propose ways to aggregate the three metrics into one evaluator. This abstract aims to bring researchers to think about the future of style transfer and style transfer evaluation research.</abstract>
<identifier type="citekey">pang-2019-towards</identifier>
<identifier type="doi">10.18653/v1/D19-5557</identifier>
<location>
<url>https://aclanthology.org/D19-5557</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>444</start>
<end>445</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Actual (Not Operational) Textual Style Transfer Auto-Evaluation
%A Pang, Richard Yuanzhe
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F pang-2019-towards
%X Regarding the problem of automatically generating paraphrases with modified styles or attributes, the difficulty lies in the lack of parallel corpora. Numerous advances have been proposed for the generation. However, significant problems remain with the auto-evaluation of style transfer tasks. Based on the summary of Pang and Gimpel (2018) and Mir et al. (2019), style transfer evaluations rely on three metrics: post-transfer style classification accuracy, content or semantic similarity, and naturalness or fluency. We elucidate the dangerous current state of style transfer auto-evaluation research. Moreover, we propose ways to aggregate the three metrics into one evaluator. This abstract aims to bring researchers to think about the future of style transfer and style transfer evaluation research.
%R 10.18653/v1/D19-5557
%U https://aclanthology.org/D19-5557
%U https://doi.org/10.18653/v1/D19-5557
%P 444-445
Markdown (Informal)
[Towards Actual (Not Operational) Textual Style Transfer Auto-Evaluation](https://aclanthology.org/D19-5557) (Pang, WNUT 2019)
ACL