@inproceedings{zhu-bhat-2020-gruen,
title = "{GRUEN} for Evaluating Linguistic Quality of Generated Text",
author = "Zhu, Wanzheng and
Bhat, Suma",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.9",
doi = "10.18653/v1/2020.findings-emnlp.9",
pages = "94--108",
abstract = "Automatic evaluation metrics are indispensable for evaluating generated text. To date, these metrics have focused almost exclusively on the content selection aspect of the system output, ignoring the linguistic quality aspect altogether. We bridge this gap by proposing GRUEN for evaluating Grammaticality, non-Redundancy, focUs, structure and coherENce of generated text. GRUEN utilizes a BERT-based model and a class of syntactic, semantic, and contextual features to examine the system output. Unlike most existing evaluation metrics which require human references as an input, GRUEN is reference-less and requires only the system output. Besides, it has the advantage of being unsupervised, deterministic, and adaptable to various tasks. Experiments on seven datasets over four language generation tasks show that the proposed metric correlates highly with human judgments.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-bhat-2020-gruen">
<titleInfo>
<title>GRUEN for Evaluating Linguistic Quality of Generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanzheng</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suma</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic evaluation metrics are indispensable for evaluating generated text. To date, these metrics have focused almost exclusively on the content selection aspect of the system output, ignoring the linguistic quality aspect altogether. We bridge this gap by proposing GRUEN for evaluating Grammaticality, non-Redundancy, focUs, structure and coherENce of generated text. GRUEN utilizes a BERT-based model and a class of syntactic, semantic, and contextual features to examine the system output. Unlike most existing evaluation metrics which require human references as an input, GRUEN is reference-less and requires only the system output. Besides, it has the advantage of being unsupervised, deterministic, and adaptable to various tasks. Experiments on seven datasets over four language generation tasks show that the proposed metric correlates highly with human judgments.</abstract>
<identifier type="citekey">zhu-bhat-2020-gruen</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.9</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.9</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>94</start>
<end>108</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GRUEN for Evaluating Linguistic Quality of Generated Text
%A Zhu, Wanzheng
%A Bhat, Suma
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F zhu-bhat-2020-gruen
%X Automatic evaluation metrics are indispensable for evaluating generated text. To date, these metrics have focused almost exclusively on the content selection aspect of the system output, ignoring the linguistic quality aspect altogether. We bridge this gap by proposing GRUEN for evaluating Grammaticality, non-Redundancy, focUs, structure and coherENce of generated text. GRUEN utilizes a BERT-based model and a class of syntactic, semantic, and contextual features to examine the system output. Unlike most existing evaluation metrics which require human references as an input, GRUEN is reference-less and requires only the system output. Besides, it has the advantage of being unsupervised, deterministic, and adaptable to various tasks. Experiments on seven datasets over four language generation tasks show that the proposed metric correlates highly with human judgments.
%R 10.18653/v1/2020.findings-emnlp.9
%U https://aclanthology.org/2020.findings-emnlp.9
%U https://doi.org/10.18653/v1/2020.findings-emnlp.9
%P 94-108
Markdown (Informal)
[GRUEN for Evaluating Linguistic Quality of Generated Text](https://aclanthology.org/2020.findings-emnlp.9) (Zhu & Bhat, Findings 2020)
ACL