@inproceedings{manning-schneider-2021-referenceless,
title = "Referenceless Parsing-Based Evaluation of {AMR}-to-{E}nglish Generation",
author = "Manning, Emma and
Schneider, Nathan",
editor = "Gao, Yang and
Eger, Steffen and
Zhao, Wei and
Lertvittayakumjorn, Piyawat and
Fomicheva, Marina",
booktitle = "Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eval4nlp-1.12/",
doi = "10.18653/v1/2021.eval4nlp-1.12",
pages = "114--122",
abstract = "Reference-based automatic evaluation metrics are notoriously limited for NLG due to their inability to fully capture the range of possible outputs. We examine a referenceless alternative: evaluating the adequacy of English sentences generated from Abstract Meaning Representation (AMR) graphs by parsing into AMR and comparing the parse directly to the input. We find that the errors introduced by automatic AMR parsing substantially limit the effectiveness of this approach, but a manual editing study indicates that as parsing improves, parsing-based evaluation has the potential to outperform most reference-based metrics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manning-schneider-2021-referenceless">
<titleInfo>
<title>Referenceless Parsing-Based Evaluation of AMR-to-English Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emma</namePart>
<namePart type="family">Manning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piyawat</namePart>
<namePart type="family">Lertvittayakumjorn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Fomicheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Reference-based automatic evaluation metrics are notoriously limited for NLG due to their inability to fully capture the range of possible outputs. We examine a referenceless alternative: evaluating the adequacy of English sentences generated from Abstract Meaning Representation (AMR) graphs by parsing into AMR and comparing the parse directly to the input. We find that the errors introduced by automatic AMR parsing substantially limit the effectiveness of this approach, but a manual editing study indicates that as parsing improves, parsing-based evaluation has the potential to outperform most reference-based metrics.</abstract>
<identifier type="citekey">manning-schneider-2021-referenceless</identifier>
<identifier type="doi">10.18653/v1/2021.eval4nlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2021.eval4nlp-1.12/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>114</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Referenceless Parsing-Based Evaluation of AMR-to-English Generation
%A Manning, Emma
%A Schneider, Nathan
%Y Gao, Yang
%Y Eger, Steffen
%Y Zhao, Wei
%Y Lertvittayakumjorn, Piyawat
%Y Fomicheva, Marina
%S Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F manning-schneider-2021-referenceless
%X Reference-based automatic evaluation metrics are notoriously limited for NLG due to their inability to fully capture the range of possible outputs. We examine a referenceless alternative: evaluating the adequacy of English sentences generated from Abstract Meaning Representation (AMR) graphs by parsing into AMR and comparing the parse directly to the input. We find that the errors introduced by automatic AMR parsing substantially limit the effectiveness of this approach, but a manual editing study indicates that as parsing improves, parsing-based evaluation has the potential to outperform most reference-based metrics.
%R 10.18653/v1/2021.eval4nlp-1.12
%U https://aclanthology.org/2021.eval4nlp-1.12/
%U https://doi.org/10.18653/v1/2021.eval4nlp-1.12
%P 114-122
Markdown (Informal)
[Referenceless Parsing-Based Evaluation of AMR-to-English Generation](https://aclanthology.org/2021.eval4nlp-1.12/) (Manning & Schneider, Eval4NLP 2021)
ACL