@inproceedings{vasilyev-bohannon-2021-estime,
title = "{ESTIME}: Estimation of Summary-to-Text Inconsistency by Mismatched Embeddings",
author = "Vasilyev, Oleg and
Bohannon, John",
editor = "Gao, Yang and
Eger, Steffen and
Zhao, Wei and
Lertvittayakumjorn, Piyawat and
Fomicheva, Marina",
booktitle = "Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eval4nlp-1.10",
doi = "10.18653/v1/2021.eval4nlp-1.10",
pages = "94--103",
abstract = "We propose a new reference-free summary quality evaluation measure, with emphasis on the faithfulness. The measure is based on finding and counting all probable potential inconsistencies of the summary with respect to the source document. The proposed ESTIME, Estimator of Summary-to-Text Inconsistency by Mismatched Embeddings, correlates with expert scores in summary-level SummEval dataset stronger than other common evaluation measures not only in Consistency but also in Fluency. We also introduce a method of generating subtle factual errors in human summaries. We show that ESTIME is more sensitive to subtle errors than other common evaluation measures.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vasilyev-bohannon-2021-estime">
<titleInfo>
<title>ESTIME: Estimation of Summary-to-Text Inconsistency by Mismatched Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Vasilyev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Bohannon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piyawat</namePart>
<namePart type="family">Lertvittayakumjorn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Fomicheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a new reference-free summary quality evaluation measure, with emphasis on the faithfulness. The measure is based on finding and counting all probable potential inconsistencies of the summary with respect to the source document. The proposed ESTIME, Estimator of Summary-to-Text Inconsistency by Mismatched Embeddings, correlates with expert scores in summary-level SummEval dataset stronger than other common evaluation measures not only in Consistency but also in Fluency. We also introduce a method of generating subtle factual errors in human summaries. We show that ESTIME is more sensitive to subtle errors than other common evaluation measures.</abstract>
<identifier type="citekey">vasilyev-bohannon-2021-estime</identifier>
<identifier type="doi">10.18653/v1/2021.eval4nlp-1.10</identifier>
<location>
<url>https://aclanthology.org/2021.eval4nlp-1.10</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>94</start>
<end>103</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ESTIME: Estimation of Summary-to-Text Inconsistency by Mismatched Embeddings
%A Vasilyev, Oleg
%A Bohannon, John
%Y Gao, Yang
%Y Eger, Steffen
%Y Zhao, Wei
%Y Lertvittayakumjorn, Piyawat
%Y Fomicheva, Marina
%S Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F vasilyev-bohannon-2021-estime
%X We propose a new reference-free summary quality evaluation measure, with emphasis on the faithfulness. The measure is based on finding and counting all probable potential inconsistencies of the summary with respect to the source document. The proposed ESTIME, Estimator of Summary-to-Text Inconsistency by Mismatched Embeddings, correlates with expert scores in summary-level SummEval dataset stronger than other common evaluation measures not only in Consistency but also in Fluency. We also introduce a method of generating subtle factual errors in human summaries. We show that ESTIME is more sensitive to subtle errors than other common evaluation measures.
%R 10.18653/v1/2021.eval4nlp-1.10
%U https://aclanthology.org/2021.eval4nlp-1.10
%U https://doi.org/10.18653/v1/2021.eval4nlp-1.10
%P 94-103
Markdown (Informal)
[ESTIME: Estimation of Summary-to-Text Inconsistency by Mismatched Embeddings](https://aclanthology.org/2021.eval4nlp-1.10) (Vasilyev & Bohannon, Eval4NLP 2021)
ACL