@inproceedings{karamolegkou-etal-2025-trick,
title = "Trick or Neat: Adversarial Ambiguity and Language Model Evaluation",
author = "Karamolegkou, Antonia and
Eberle, Oliver and
Rust, Phillip and
Kauf, Carina and
S{\o}gaard, Anders",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.954/",
doi = "10.18653/v1/2025.findings-acl.954",
pages = "18542--18561",
ISBN = "979-8-89176-256-5",
abstract = "Detecting ambiguity is important for language understanding, including uncertainty estimation, humour detection, and processing garden path sentences. We assess language models' sensitivity to ambiguity by introducing an adversarial ambiguity dataset that includes syntactic, lexical, and phonological ambiguities along with adversarial variations (e.g., word-order changes, synonym replacements, and random-based alterations). Our findings show that direct prompting fails to robustly identify ambiguity, while linear probes trained on model representations can decode ambiguity with high accuracy, sometimes exceeding 90{\%}. Our results offer insights into the prompting paradigm and how language models encode ambiguity at different layers."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karamolegkou-etal-2025-trick">
<titleInfo>
<title>Trick or Neat: Adversarial Ambiguity and Language Model Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antonia</namePart>
<namePart type="family">Karamolegkou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Eberle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phillip</namePart>
<namePart type="family">Rust</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carina</namePart>
<namePart type="family">Kauf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anders</namePart>
<namePart type="family">Søgaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Detecting ambiguity is important for language understanding, including uncertainty estimation, humour detection, and processing garden path sentences. We assess language models’ sensitivity to ambiguity by introducing an adversarial ambiguity dataset that includes syntactic, lexical, and phonological ambiguities along with adversarial variations (e.g., word-order changes, synonym replacements, and random-based alterations). Our findings show that direct prompting fails to robustly identify ambiguity, while linear probes trained on model representations can decode ambiguity with high accuracy, sometimes exceeding 90%. Our results offer insights into the prompting paradigm and how language models encode ambiguity at different layers.</abstract>
<identifier type="citekey">karamolegkou-etal-2025-trick</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.954</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.954/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>18542</start>
<end>18561</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Trick or Neat: Adversarial Ambiguity and Language Model Evaluation
%A Karamolegkou, Antonia
%A Eberle, Oliver
%A Rust, Phillip
%A Kauf, Carina
%A Søgaard, Anders
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F karamolegkou-etal-2025-trick
%X Detecting ambiguity is important for language understanding, including uncertainty estimation, humour detection, and processing garden path sentences. We assess language models’ sensitivity to ambiguity by introducing an adversarial ambiguity dataset that includes syntactic, lexical, and phonological ambiguities along with adversarial variations (e.g., word-order changes, synonym replacements, and random-based alterations). Our findings show that direct prompting fails to robustly identify ambiguity, while linear probes trained on model representations can decode ambiguity with high accuracy, sometimes exceeding 90%. Our results offer insights into the prompting paradigm and how language models encode ambiguity at different layers.
%R 10.18653/v1/2025.findings-acl.954
%U https://aclanthology.org/2025.findings-acl.954/
%U https://doi.org/10.18653/v1/2025.findings-acl.954
%P 18542-18561
Markdown (Informal)
[Trick or Neat: Adversarial Ambiguity and Language Model Evaluation](https://aclanthology.org/2025.findings-acl.954/) (Karamolegkou et al., Findings 2025)
ACL