@inproceedings{jieyu-etal-2024-sentence,
title = "Sentence-Space Metrics ({SSM}) for the Evaluation of Sentence Comprehension",
author = "Jieyu, Lin and
Honghua, Chen and
Nai, Ding",
editor = "Sun, Maosong and
Liang, Jiye and
Han, Xianpei and
Liu, Zhiyuan and
He, Yulan",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2024.ccl-1.103/",
pages = "1334--1350",
language = "eng",
abstract = "{\textquotedblleft}It is a fundamental challenge to evaluate whether a model can truly capture the meaning ofsentences. Evaluation of whether a model well captures the meaning of individual words, how-ever, can be effectively achieved by analyzing whether the model encodes words in a vectorspace where semantically similar words form clusters. Inspired by this approach, we propose theSentence-Space Metrics (SSM) to evaluate model interpretation of sentences, and the sentencespace is constructed based on the pairwise entailment relationships between all sentence pairswithin a sentence pool. We use three metrics to evaluate a sentence space, i.e., (1) sparsity, (2)clustering of related sentences, and (3) similarity with the sentence space measured from hu-mans. The SSM is applied to evaluate 20 models, including ChatGPT, 18 BERT-family modelsfine-tuned for Natural Language Inference (NLI) task, as well as SimCSE, a sentence representa-tion model. The SSM reveals dramatic differences among models: Although all models achievehigh accuracy on standard NLI datasets such as MNLI, none of them mirrors the human behaviorunder the SSM. These results demonstrate that, compared with traditional accuracy measures,the SSM considers pairwise relationships between hundreds of sentences and therefore providea more fine-grained evaluation of model interpretation of sentences.Introduction{\textquotedblright}"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jieyu-etal-2024-sentence">
<titleInfo>
<title>Sentence-Space Metrics (SSM) for the Evaluation of Sentence Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="family">Jieyu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Honghua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ding</namePart>
<namePart type="family">Nai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiye</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Taiyuan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“It is a fundamental challenge to evaluate whether a model can truly capture the meaning ofsentences. Evaluation of whether a model well captures the meaning of individual words, how-ever, can be effectively achieved by analyzing whether the model encodes words in a vectorspace where semantically similar words form clusters. Inspired by this approach, we propose theSentence-Space Metrics (SSM) to evaluate model interpretation of sentences, and the sentencespace is constructed based on the pairwise entailment relationships between all sentence pairswithin a sentence pool. We use three metrics to evaluate a sentence space, i.e., (1) sparsity, (2)clustering of related sentences, and (3) similarity with the sentence space measured from hu-mans. The SSM is applied to evaluate 20 models, including ChatGPT, 18 BERT-family modelsfine-tuned for Natural Language Inference (NLI) task, as well as SimCSE, a sentence representa-tion model. The SSM reveals dramatic differences among models: Although all models achievehigh accuracy on standard NLI datasets such as MNLI, none of them mirrors the human behaviorunder the SSM. These results demonstrate that, compared with traditional accuracy measures,the SSM considers pairwise relationships between hundreds of sentences and therefore providea more fine-grained evaluation of model interpretation of sentences.Introduction”</abstract>
<identifier type="citekey">jieyu-etal-2024-sentence</identifier>
<location>
<url>https://aclanthology.org/2024.ccl-1.103/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>1334</start>
<end>1350</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sentence-Space Metrics (SSM) for the Evaluation of Sentence Comprehension
%A Jieyu, Lin
%A Honghua, Chen
%A Nai, Ding
%Y Sun, Maosong
%Y Liang, Jiye
%Y Han, Xianpei
%Y Liu, Zhiyuan
%Y He, Yulan
%S Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)
%D 2024
%8 July
%I Chinese Information Processing Society of China
%C Taiyuan, China
%G eng
%F jieyu-etal-2024-sentence
%X “It is a fundamental challenge to evaluate whether a model can truly capture the meaning ofsentences. Evaluation of whether a model well captures the meaning of individual words, how-ever, can be effectively achieved by analyzing whether the model encodes words in a vectorspace where semantically similar words form clusters. Inspired by this approach, we propose theSentence-Space Metrics (SSM) to evaluate model interpretation of sentences, and the sentencespace is constructed based on the pairwise entailment relationships between all sentence pairswithin a sentence pool. We use three metrics to evaluate a sentence space, i.e., (1) sparsity, (2)clustering of related sentences, and (3) similarity with the sentence space measured from hu-mans. The SSM is applied to evaluate 20 models, including ChatGPT, 18 BERT-family modelsfine-tuned for Natural Language Inference (NLI) task, as well as SimCSE, a sentence representa-tion model. The SSM reveals dramatic differences among models: Although all models achievehigh accuracy on standard NLI datasets such as MNLI, none of them mirrors the human behaviorunder the SSM. These results demonstrate that, compared with traditional accuracy measures,the SSM considers pairwise relationships between hundreds of sentences and therefore providea more fine-grained evaluation of model interpretation of sentences.Introduction”
%U https://aclanthology.org/2024.ccl-1.103/
%P 1334-1350
Markdown (Informal)
[Sentence-Space Metrics (SSM) for the Evaluation of Sentence Comprehension](https://aclanthology.org/2024.ccl-1.103/) (Jieyu et al., CCL 2024)
ACL