@inproceedings{yoon-etal-2026-autoanoeval,
title = "{A}uto{A}no{E}val: Semantic-Aware Model Selection via Tree-Guided {LLM} Reasoning for Tabular Anomaly Detection",
author = "Yoon, Suhee and
Yoon, Sanghyu and
Sim, Ye Seul and
Yoa, Seungdong and
Kim, Dongmin and
Lee, Soonyoung and
Lee, Hankook and
Lim, Woohyung",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.183/",
pages = "3546--3560",
ISBN = "979-8-89176-386-9",
abstract = "In the tabular domain, which is the predominant data format in real-world applications, anomalies are extremely rare or difficult to collect, as their identification often requires domain expertise. Consequently, evaluating tabular anomaly detection models is challenging, since anomalies may be absent even in evaluation sets. To tackle this challenge, prior works have generated synthetic anomaly generation rely on statistical patterns, they often overlook domain semantics and struggle to reflect the complex, domain-specific nature of real-world anomalies. We propose AutoAnoEval, a novel evaluation framework for tabular AD that constructs pseudo-evaluation sets with semantically grounded synthetic anomalies. Our approach leverages an iterative interaction between a Large Language Model (LLM) and a decision tree (DT): the LLM generates realistic anomaly conditions based on contextual semantics, while the DT provides structural guidance by capturing feature interactions inherent in the tabular data. This iterative loop ensures the generation of diverse anomaly conditions, ranging from easily detectable outliers to subtle cases near the decision boundary. Extensive experiments on 20 tabular AD benchmarks demonstrate that AutoAnoEval achieves superior model selection performance, with high ranking alignment and minimal performance gaps compared to evaluations on anomalies encountered in practical applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yoon-etal-2026-autoanoeval">
<titleInfo>
<title>AutoAnoEval: Semantic-Aware Model Selection via Tree-Guided LLM Reasoning for Tabular Anomaly Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Suhee</namePart>
<namePart type="family">Yoon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanghyu</namePart>
<namePart type="family">Yoon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ye</namePart>
<namePart type="given">Seul</namePart>
<namePart type="family">Sim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seungdong</namePart>
<namePart type="family">Yoa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongmin</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soonyoung</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hankook</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Woohyung</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>In the tabular domain, which is the predominant data format in real-world applications, anomalies are extremely rare or difficult to collect, as their identification often requires domain expertise. Consequently, evaluating tabular anomaly detection models is challenging, since anomalies may be absent even in evaluation sets. To tackle this challenge, prior works have generated synthetic anomaly generation rely on statistical patterns, they often overlook domain semantics and struggle to reflect the complex, domain-specific nature of real-world anomalies. We propose AutoAnoEval, a novel evaluation framework for tabular AD that constructs pseudo-evaluation sets with semantically grounded synthetic anomalies. Our approach leverages an iterative interaction between a Large Language Model (LLM) and a decision tree (DT): the LLM generates realistic anomaly conditions based on contextual semantics, while the DT provides structural guidance by capturing feature interactions inherent in the tabular data. This iterative loop ensures the generation of diverse anomaly conditions, ranging from easily detectable outliers to subtle cases near the decision boundary. Extensive experiments on 20 tabular AD benchmarks demonstrate that AutoAnoEval achieves superior model selection performance, with high ranking alignment and minimal performance gaps compared to evaluations on anomalies encountered in practical applications.</abstract>
<identifier type="citekey">yoon-etal-2026-autoanoeval</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.183/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>3546</start>
<end>3560</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AutoAnoEval: Semantic-Aware Model Selection via Tree-Guided LLM Reasoning for Tabular Anomaly Detection
%A Yoon, Suhee
%A Yoon, Sanghyu
%A Sim, Ye Seul
%A Yoa, Seungdong
%A Kim, Dongmin
%A Lee, Soonyoung
%A Lee, Hankook
%A Lim, Woohyung
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F yoon-etal-2026-autoanoeval
%X In the tabular domain, which is the predominant data format in real-world applications, anomalies are extremely rare or difficult to collect, as their identification often requires domain expertise. Consequently, evaluating tabular anomaly detection models is challenging, since anomalies may be absent even in evaluation sets. To tackle this challenge, prior works have generated synthetic anomaly generation rely on statistical patterns, they often overlook domain semantics and struggle to reflect the complex, domain-specific nature of real-world anomalies. We propose AutoAnoEval, a novel evaluation framework for tabular AD that constructs pseudo-evaluation sets with semantically grounded synthetic anomalies. Our approach leverages an iterative interaction between a Large Language Model (LLM) and a decision tree (DT): the LLM generates realistic anomaly conditions based on contextual semantics, while the DT provides structural guidance by capturing feature interactions inherent in the tabular data. This iterative loop ensures the generation of diverse anomaly conditions, ranging from easily detectable outliers to subtle cases near the decision boundary. Extensive experiments on 20 tabular AD benchmarks demonstrate that AutoAnoEval achieves superior model selection performance, with high ranking alignment and minimal performance gaps compared to evaluations on anomalies encountered in practical applications.
%U https://aclanthology.org/2026.findings-eacl.183/
%P 3546-3560
Markdown (Informal)
[AutoAnoEval: Semantic-Aware Model Selection via Tree-Guided LLM Reasoning for Tabular Anomaly Detection](https://aclanthology.org/2026.findings-eacl.183/) (Yoon et al., Findings 2026)
ACL
- Suhee Yoon, Sanghyu Yoon, Ye Seul Sim, Seungdong Yoa, Dongmin Kim, Soonyoung Lee, Hankook Lee, and Woohyung Lim. 2026. AutoAnoEval: Semantic-Aware Model Selection via Tree-Guided LLM Reasoning for Tabular Anomaly Detection. In Findings of the Association for Computational Linguistics: EACL 2026, pages 3546–3560, Rabat, Morocco. Association for Computational Linguistics.