@inproceedings{kour-etal-2022-measuring,
title = "Measuring the Measuring Tools: An Automatic Evaluation of Semantic Metrics for Text Corpora",
author = "Kour, George and
Ackerman, Samuel and
Farchi, Eitan Daniel and
Raz, Orna and
Carmeli, Boaz and
Tavor, Ateret Anaby",
editor = "Bosselut, Antoine and
Chandu, Khyathi and
Dhole, Kaustubh and
Gangal, Varun and
Gehrmann, Sebastian and
Jernite, Yacine and
Novikova, Jekaterina and
Perez-Beltrachini, Laura",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.gem-1.35",
doi = "10.18653/v1/2022.gem-1.35",
pages = "405--416",
abstract = "Similarity metrics for text corpora are becoming critical due to the tremendous growth in the number of generative models. These similarity metrics measure the semantic gap between human and machine-generated text on the corpus level. However, standard methods for evaluating the characteristics of these metrics have yet to be established. We propose a set of automatic measures for evaluating the characteristics of semantic similarity metrics for text corpora. Our measures allow us to sensibly compare and identify the strengths and weaknesses of these metrics. We demonstrate the effectiveness of our evaluation measures in capturing fundamental characteristics by comparing it to a collection of classical and state-of-the-art metrics. Our measures revealed that recent metrics are becoming better in identifying semantic distributional mismatch while classical metrics are more sensitive to perturbations in the surface text levels.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kour-etal-2022-measuring">
<titleInfo>
<title>Measuring the Measuring Tools: An Automatic Evaluation of Semantic Metrics for Text Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Kour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Ackerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eitan</namePart>
<namePart type="given">Daniel</namePart>
<namePart type="family">Farchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orna</namePart>
<namePart type="family">Raz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boaz</namePart>
<namePart type="family">Carmeli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ateret</namePart>
<namePart type="given">Anaby</namePart>
<namePart type="family">Tavor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bosselut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khyathi</namePart>
<namePart type="family">Chandu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaustubh</namePart>
<namePart type="family">Dhole</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="family">Gangal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yacine</namePart>
<namePart type="family">Jernite</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jekaterina</namePart>
<namePart type="family">Novikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Perez-Beltrachini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Similarity metrics for text corpora are becoming critical due to the tremendous growth in the number of generative models. These similarity metrics measure the semantic gap between human and machine-generated text on the corpus level. However, standard methods for evaluating the characteristics of these metrics have yet to be established. We propose a set of automatic measures for evaluating the characteristics of semantic similarity metrics for text corpora. Our measures allow us to sensibly compare and identify the strengths and weaknesses of these metrics. We demonstrate the effectiveness of our evaluation measures in capturing fundamental characteristics by comparing it to a collection of classical and state-of-the-art metrics. Our measures revealed that recent metrics are becoming better in identifying semantic distributional mismatch while classical metrics are more sensitive to perturbations in the surface text levels.</abstract>
<identifier type="citekey">kour-etal-2022-measuring</identifier>
<identifier type="doi">10.18653/v1/2022.gem-1.35</identifier>
<location>
<url>https://aclanthology.org/2022.gem-1.35</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>405</start>
<end>416</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring the Measuring Tools: An Automatic Evaluation of Semantic Metrics for Text Corpora
%A Kour, George
%A Ackerman, Samuel
%A Farchi, Eitan Daniel
%A Raz, Orna
%A Carmeli, Boaz
%A Tavor, Ateret Anaby
%Y Bosselut, Antoine
%Y Chandu, Khyathi
%Y Dhole, Kaustubh
%Y Gangal, Varun
%Y Gehrmann, Sebastian
%Y Jernite, Yacine
%Y Novikova, Jekaterina
%Y Perez-Beltrachini, Laura
%S Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F kour-etal-2022-measuring
%X Similarity metrics for text corpora are becoming critical due to the tremendous growth in the number of generative models. These similarity metrics measure the semantic gap between human and machine-generated text on the corpus level. However, standard methods for evaluating the characteristics of these metrics have yet to be established. We propose a set of automatic measures for evaluating the characteristics of semantic similarity metrics for text corpora. Our measures allow us to sensibly compare and identify the strengths and weaknesses of these metrics. We demonstrate the effectiveness of our evaluation measures in capturing fundamental characteristics by comparing it to a collection of classical and state-of-the-art metrics. Our measures revealed that recent metrics are becoming better in identifying semantic distributional mismatch while classical metrics are more sensitive to perturbations in the surface text levels.
%R 10.18653/v1/2022.gem-1.35
%U https://aclanthology.org/2022.gem-1.35
%U https://doi.org/10.18653/v1/2022.gem-1.35
%P 405-416
Markdown (Informal)
[Measuring the Measuring Tools: An Automatic Evaluation of Semantic Metrics for Text Corpora](https://aclanthology.org/2022.gem-1.35) (Kour et al., GEM 2022)
ACL
- George Kour, Samuel Ackerman, Eitan Daniel Farchi, Orna Raz, Boaz Carmeli, and Ateret Anaby Tavor. 2022. Measuring the Measuring Tools: An Automatic Evaluation of Semantic Metrics for Text Corpora. In Proceedings of the 2nd Workshop on Natural Language Generation, Evaluation, and Metrics (GEM), pages 405–416, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.