@inproceedings{antropova-etal-2025-tabaqa,
title = "{T}aba{QA} at {S}em{E}val-2025 Task 8: Column Augmented Generation for Question Answering over Tabular Data",
author = "Antropova, Ekaterina and
Kratkov, Egor and
Derunets, Roman and
Trofimova, Margarita and
Bondarenko, Ivan and
Panchenko, Alexander and
Konovalov, Vasily and
Savkin, Maksim",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.126/",
pages = "937--952",
ISBN = "979-8-89176-273-2",
abstract = "The DataBench shared task in the SemEval-2025 competition aims to tackle the problem of QA from data in tables. Given the diversity of the structure of tables, there are different approaches to retrieving the answer. Although Retrieval-Augmented Generation (RAG) is a viable solution, extracting relevant information from tables remains challenging. In addition, the table can be prohibitively large for direct integration into the LLM context. In this paper, we address QA over tabular data first by identifying relevant columns that might contain the answers, then the LLM generates answers by providing the context of the relevant columns, and finally, the LLM refines its answers. This approach secured us 7th place in the DataBench lite category."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="antropova-etal-2025-tabaqa">
<titleInfo>
<title>TabaQA at SemEval-2025 Task 8: Column Augmented Generation for Question Answering over Tabular Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Antropova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Egor</namePart>
<namePart type="family">Kratkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Derunets</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margarita</namePart>
<namePart type="family">Trofimova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Bondarenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasily</namePart>
<namePart type="family">Konovalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maksim</namePart>
<namePart type="family">Savkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>The DataBench shared task in the SemEval-2025 competition aims to tackle the problem of QA from data in tables. Given the diversity of the structure of tables, there are different approaches to retrieving the answer. Although Retrieval-Augmented Generation (RAG) is a viable solution, extracting relevant information from tables remains challenging. In addition, the table can be prohibitively large for direct integration into the LLM context. In this paper, we address QA over tabular data first by identifying relevant columns that might contain the answers, then the LLM generates answers by providing the context of the relevant columns, and finally, the LLM refines its answers. This approach secured us 7th place in the DataBench lite category.</abstract>
<identifier type="citekey">antropova-etal-2025-tabaqa</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.126/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>937</start>
<end>952</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TabaQA at SemEval-2025 Task 8: Column Augmented Generation for Question Answering over Tabular Data
%A Antropova, Ekaterina
%A Kratkov, Egor
%A Derunets, Roman
%A Trofimova, Margarita
%A Bondarenko, Ivan
%A Panchenko, Alexander
%A Konovalov, Vasily
%A Savkin, Maksim
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F antropova-etal-2025-tabaqa
%X The DataBench shared task in the SemEval-2025 competition aims to tackle the problem of QA from data in tables. Given the diversity of the structure of tables, there are different approaches to retrieving the answer. Although Retrieval-Augmented Generation (RAG) is a viable solution, extracting relevant information from tables remains challenging. In addition, the table can be prohibitively large for direct integration into the LLM context. In this paper, we address QA over tabular data first by identifying relevant columns that might contain the answers, then the LLM generates answers by providing the context of the relevant columns, and finally, the LLM refines its answers. This approach secured us 7th place in the DataBench lite category.
%U https://aclanthology.org/2025.semeval-1.126/
%P 937-952
Markdown (Informal)
[TabaQA at SemEval-2025 Task 8: Column Augmented Generation for Question Answering over Tabular Data](https://aclanthology.org/2025.semeval-1.126/) (Antropova et al., SemEval 2025)
ACL
- Ekaterina Antropova, Egor Kratkov, Roman Derunets, Margarita Trofimova, Ivan Bondarenko, Alexander Panchenko, Vasily Konovalov, and Maksim Savkin. 2025. TabaQA at SemEval-2025 Task 8: Column Augmented Generation for Question Answering over Tabular Data. In Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025), pages 937–952, Vienna, Austria. Association for Computational Linguistics.