@inproceedings{ackerman-etal-2024-novel,
title = "A Novel Metric for Measuring the Robustness of Large Language Models in Non-adversarial Scenarios",
author = "Ackerman, Samuel and
Rabinovich, Ella and
Farchi, Eitan and
Anaby Tavor, Ateret",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.158",
pages = "2794--2802",
abstract = "We evaluate the robustness of several large language models on multiple datasets. Robustness here refers to the relative insensitivity of the model{'}s answers to meaning-preserving variants of their input. Benchmark datasets are constructed by introducing naturally-occurring, non-malicious perturbations, or by generating semantically equivalent paraphrases of input questions or statements. We further propose a novel metric for assessing a model robustness, and demonstrate its benefits in the non-adversarial scenario by empirical evaluation of several models on the created datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ackerman-etal-2024-novel">
<titleInfo>
<title>A Novel Metric for Measuring the Robustness of Large Language Models in Non-adversarial Scenarios</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Ackerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ella</namePart>
<namePart type="family">Rabinovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eitan</namePart>
<namePart type="family">Farchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ateret</namePart>
<namePart type="family">Anaby Tavor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We evaluate the robustness of several large language models on multiple datasets. Robustness here refers to the relative insensitivity of the model’s answers to meaning-preserving variants of their input. Benchmark datasets are constructed by introducing naturally-occurring, non-malicious perturbations, or by generating semantically equivalent paraphrases of input questions or statements. We further propose a novel metric for assessing a model robustness, and demonstrate its benefits in the non-adversarial scenario by empirical evaluation of several models on the created datasets.</abstract>
<identifier type="citekey">ackerman-etal-2024-novel</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.158</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>2794</start>
<end>2802</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Novel Metric for Measuring the Robustness of Large Language Models in Non-adversarial Scenarios
%A Ackerman, Samuel
%A Rabinovich, Ella
%A Farchi, Eitan
%A Anaby Tavor, Ateret
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F ackerman-etal-2024-novel
%X We evaluate the robustness of several large language models on multiple datasets. Robustness here refers to the relative insensitivity of the model’s answers to meaning-preserving variants of their input. Benchmark datasets are constructed by introducing naturally-occurring, non-malicious perturbations, or by generating semantically equivalent paraphrases of input questions or statements. We further propose a novel metric for assessing a model robustness, and demonstrate its benefits in the non-adversarial scenario by empirical evaluation of several models on the created datasets.
%U https://aclanthology.org/2024.findings-emnlp.158
%P 2794-2802
Markdown (Informal)
[A Novel Metric for Measuring the Robustness of Large Language Models in Non-adversarial Scenarios](https://aclanthology.org/2024.findings-emnlp.158) (Ackerman et al., Findings 2024)
ACL