@inproceedings{yerukola-etal-2023-dont,
title = "Don{'}t Take This Out of Context!: On the Need for Contextual Models and Evaluations for Stylistic Rewriting",
author = "Yerukola, Akhila and
Zhou, Xuhui and
Clark, Elizabeth and
Sap, Maarten",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.701",
doi = "10.18653/v1/2023.emnlp-main.701",
pages = "11419--11444",
abstract = "Most existing stylistic text rewriting methods and evaluation metrics operate on a sentence level, but ignoring the broader context of the text can lead to preferring generic, ambiguous, and incoherent rewrites. In this paper, we investigate integrating the preceding textual context into both the $\textit{rewriting}$ and $\textit{evaluation}$ stages of stylistic text rewriting, and introduce a new composite contextual evaluation metric $\texttt{CtxSimFit}$ that combines similarity to the original sentence with contextual cohesiveness. We comparatively evaluate non-contextual and contextual rewrites in formality, toxicity, and sentiment transfer tasks. Our experiments show that humans significantly prefer contextual rewrites as more fitting and natural over non-contextual ones, yet existing sentence-level automatic metrics (e.g., ROUGE, SBERT) correlate poorly with human preferences ($\rho$=0{--}0.3). In contrast, human preferences are much better reflected by both our novel $\texttt{CtxSimFit}$ ($\rho$=0.7{--}0.9) as well as proposed context-infused versions of common metrics ($\rho$=0.4{--}0.7). Overall, our findings highlight the importance of integrating context into the generation and especially the evaluation stages of stylistic text rewriting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yerukola-etal-2023-dont">
<titleInfo>
<title>Don’t Take This Out of Context!: On the Need for Contextual Models and Evaluations for Stylistic Rewriting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akhila</namePart>
<namePart type="family">Yerukola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuhui</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maarten</namePart>
<namePart type="family">Sap</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most existing stylistic text rewriting methods and evaluation metrics operate on a sentence level, but ignoring the broader context of the text can lead to preferring generic, ambiguous, and incoherent rewrites. In this paper, we investigate integrating the preceding textual context into both the rewriting and evaluation stages of stylistic text rewriting, and introduce a new composite contextual evaluation metric CtxSimFit that combines similarity to the original sentence with contextual cohesiveness. We comparatively evaluate non-contextual and contextual rewrites in formality, toxicity, and sentiment transfer tasks. Our experiments show that humans significantly prefer contextual rewrites as more fitting and natural over non-contextual ones, yet existing sentence-level automatic metrics (e.g., ROUGE, SBERT) correlate poorly with human preferences (ρ=0–0.3). In contrast, human preferences are much better reflected by both our novel CtxSimFit (ρ=0.7–0.9) as well as proposed context-infused versions of common metrics (ρ=0.4–0.7). Overall, our findings highlight the importance of integrating context into the generation and especially the evaluation stages of stylistic text rewriting.</abstract>
<identifier type="citekey">yerukola-etal-2023-dont</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.701</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.701</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>11419</start>
<end>11444</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Don’t Take This Out of Context!: On the Need for Contextual Models and Evaluations for Stylistic Rewriting
%A Yerukola, Akhila
%A Zhou, Xuhui
%A Clark, Elizabeth
%A Sap, Maarten
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F yerukola-etal-2023-dont
%X Most existing stylistic text rewriting methods and evaluation metrics operate on a sentence level, but ignoring the broader context of the text can lead to preferring generic, ambiguous, and incoherent rewrites. In this paper, we investigate integrating the preceding textual context into both the rewriting and evaluation stages of stylistic text rewriting, and introduce a new composite contextual evaluation metric CtxSimFit that combines similarity to the original sentence with contextual cohesiveness. We comparatively evaluate non-contextual and contextual rewrites in formality, toxicity, and sentiment transfer tasks. Our experiments show that humans significantly prefer contextual rewrites as more fitting and natural over non-contextual ones, yet existing sentence-level automatic metrics (e.g., ROUGE, SBERT) correlate poorly with human preferences (ρ=0–0.3). In contrast, human preferences are much better reflected by both our novel CtxSimFit (ρ=0.7–0.9) as well as proposed context-infused versions of common metrics (ρ=0.4–0.7). Overall, our findings highlight the importance of integrating context into the generation and especially the evaluation stages of stylistic text rewriting.
%R 10.18653/v1/2023.emnlp-main.701
%U https://aclanthology.org/2023.emnlp-main.701
%U https://doi.org/10.18653/v1/2023.emnlp-main.701
%P 11419-11444
Markdown (Informal)
[Don’t Take This Out of Context!: On the Need for Contextual Models and Evaluations for Stylistic Rewriting](https://aclanthology.org/2023.emnlp-main.701) (Yerukola et al., EMNLP 2023)
ACL