@inproceedings{bystronski-etal-2026-factual,
title = "Factual State Discovery Benchmark: Evaluating Fact Elicitation in {P}olish Tax Law",
author = "Bystro{\'n}ski, Mateusz and
Tagowski, Kamil and
Janiak, Denis and
Farganus, Julia and
Augustyniak, Lukasz and
Kajdanowicz, Monika and
Kajdanowicz, Tomasz Jan",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.114/",
pages = "1262--1279",
ISBN = "979-8-89176-393-7",
abstract = "Before a tax authority can issue a ruling, it must receive a complete description of the taxpayer{'}s situation{---}yet no benchmark measures whether language models can systematically elicit all relevant facts through dialogue.We introduce \textbf{FSDBench} (\textbf{Factual State Discovery Benchmark}), in which a discovery agent questions a simulated taxpayer grounded in a real tax document.The dataset comprises 500 narratives from official Polish tax interpretations, decomposed into 32 874 atomic facts with validated supported precision (97.6{\%}), atomicity (93.8{\%}), and sentence coverage (96.0{\%}).Experiments with four models show that even the best system recovers only 77{\%} of facts on easy samples and under 49{\%} on hard samples after 50 turns.These findings establish conversational fact elicitation as a challenging open problem requiring retrieval-augmented and adaptive questioning strategies."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bystronski-etal-2026-factual">
<titleInfo>
<title>Factual State Discovery Benchmark: Evaluating Fact Elicitation in Polish Tax Law</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mateusz</namePart>
<namePart type="family">Bystroński</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kamil</namePart>
<namePart type="family">Tagowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Denis</namePart>
<namePart type="family">Janiak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Farganus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lukasz</namePart>
<namePart type="family">Augustyniak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monika</namePart>
<namePart type="family">Kajdanowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomasz</namePart>
<namePart type="given">Jan</namePart>
<namePart type="family">Kajdanowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Before a tax authority can issue a ruling, it must receive a complete description of the taxpayer’s situation—yet no benchmark measures whether language models can systematically elicit all relevant facts through dialogue.We introduce FSDBench (Factual State Discovery Benchmark), in which a discovery agent questions a simulated taxpayer grounded in a real tax document.The dataset comprises 500 narratives from official Polish tax interpretations, decomposed into 32 874 atomic facts with validated supported precision (97.6%), atomicity (93.8%), and sentence coverage (96.0%).Experiments with four models show that even the best system recovers only 77% of facts on easy samples and under 49% on hard samples after 50 turns.These findings establish conversational fact elicitation as a challenging open problem requiring retrieval-augmented and adaptive questioning strategies.</abstract>
<identifier type="citekey">bystronski-etal-2026-factual</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.114/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1262</start>
<end>1279</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Factual State Discovery Benchmark: Evaluating Fact Elicitation in Polish Tax Law
%A Bystroński, Mateusz
%A Tagowski, Kamil
%A Janiak, Denis
%A Farganus, Julia
%A Augustyniak, Lukasz
%A Kajdanowicz, Monika
%A Kajdanowicz, Tomasz Jan
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F bystronski-etal-2026-factual
%X Before a tax authority can issue a ruling, it must receive a complete description of the taxpayer’s situation—yet no benchmark measures whether language models can systematically elicit all relevant facts through dialogue.We introduce FSDBench (Factual State Discovery Benchmark), in which a discovery agent questions a simulated taxpayer grounded in a real tax document.The dataset comprises 500 narratives from official Polish tax interpretations, decomposed into 32 874 atomic facts with validated supported precision (97.6%), atomicity (93.8%), and sentence coverage (96.0%).Experiments with four models show that even the best system recovers only 77% of facts on easy samples and under 49% on hard samples after 50 turns.These findings establish conversational fact elicitation as a challenging open problem requiring retrieval-augmented and adaptive questioning strategies.
%U https://aclanthology.org/2026.acl-srw.114/
%P 1262-1279
Markdown (Informal)
[Factual State Discovery Benchmark: Evaluating Fact Elicitation in Polish Tax Law](https://aclanthology.org/2026.acl-srw.114/) (Bystroński et al., ACL 2026)
ACL
- Mateusz Bystroński, Kamil Tagowski, Denis Janiak, Julia Farganus, Lukasz Augustyniak, Monika Kajdanowicz, and Tomasz Jan Kajdanowicz. 2026. Factual State Discovery Benchmark: Evaluating Fact Elicitation in Polish Tax Law. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026), pages 1262–1279, San Diego, California, United States. Association for Computational Linguistics.