@inproceedings{jiang-etal-2025-tst,
title = "{TST}: A Schema-Based Top-Down and Dynamic-Aware Agent of Text-to-Table Tasks",
author = "Jiang, Peiwen and
Jiang, Haitong and
Ma, Ruhui and
Chen, Yvonne Jie and
Cheng, Jinhua",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.829/",
doi = "10.18653/v1/2025.acl-long.829",
pages = "16951--16966",
ISBN = "979-8-89176-251-0",
abstract = "As a bridge between natural texts and information systems like structured storage, statistical analysis, retrieving, and recommendation, the text-to-table task has received widespread attention recently. Existing researches have gone through a paradigm shift from traditional bottom-up IE (Information Extraction) to top-down LLMs-based question answering with RAG (Retrieval-Augmented Generation). Furthermore, these methods mainly adopt end-to-end models or use multi-stage pipelines to extract text content based on static table structures. However, they neglect to deal with precise inner-document evidence extraction and dynamic information such as multiple entities and events, which can not be defined in static table head format and are very common in natural texts.To address this issue, we propose a two-stage dynamic content extraction agent framework called TST (Text-Schema-Table), which uses type recognition methods to extract context evidences with the conduction of domain schema sequentially. Based on the evidence, firstly we quantify the total instances of each dynamic object and then extract them with ordered numerical prompts. Through extensive comparisons with existing methods across different datasets, our extraction framework exhibits state-of-the-art (SOTA) performance. Our codes are available at \url{https://github.com/jiangpw41/TST}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiang-etal-2025-tst">
<titleInfo>
<title>TST: A Schema-Based Top-Down and Dynamic-Aware Agent of Text-to-Table Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peiwen</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haitong</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruhui</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yvonne</namePart>
<namePart type="given">Jie</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinhua</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>As a bridge between natural texts and information systems like structured storage, statistical analysis, retrieving, and recommendation, the text-to-table task has received widespread attention recently. Existing researches have gone through a paradigm shift from traditional bottom-up IE (Information Extraction) to top-down LLMs-based question answering with RAG (Retrieval-Augmented Generation). Furthermore, these methods mainly adopt end-to-end models or use multi-stage pipelines to extract text content based on static table structures. However, they neglect to deal with precise inner-document evidence extraction and dynamic information such as multiple entities and events, which can not be defined in static table head format and are very common in natural texts.To address this issue, we propose a two-stage dynamic content extraction agent framework called TST (Text-Schema-Table), which uses type recognition methods to extract context evidences with the conduction of domain schema sequentially. Based on the evidence, firstly we quantify the total instances of each dynamic object and then extract them with ordered numerical prompts. Through extensive comparisons with existing methods across different datasets, our extraction framework exhibits state-of-the-art (SOTA) performance. Our codes are available at https://github.com/jiangpw41/TST.</abstract>
<identifier type="citekey">jiang-etal-2025-tst</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.829</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.829/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>16951</start>
<end>16966</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TST: A Schema-Based Top-Down and Dynamic-Aware Agent of Text-to-Table Tasks
%A Jiang, Peiwen
%A Jiang, Haitong
%A Ma, Ruhui
%A Chen, Yvonne Jie
%A Cheng, Jinhua
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F jiang-etal-2025-tst
%X As a bridge between natural texts and information systems like structured storage, statistical analysis, retrieving, and recommendation, the text-to-table task has received widespread attention recently. Existing researches have gone through a paradigm shift from traditional bottom-up IE (Information Extraction) to top-down LLMs-based question answering with RAG (Retrieval-Augmented Generation). Furthermore, these methods mainly adopt end-to-end models or use multi-stage pipelines to extract text content based on static table structures. However, they neglect to deal with precise inner-document evidence extraction and dynamic information such as multiple entities and events, which can not be defined in static table head format and are very common in natural texts.To address this issue, we propose a two-stage dynamic content extraction agent framework called TST (Text-Schema-Table), which uses type recognition methods to extract context evidences with the conduction of domain schema sequentially. Based on the evidence, firstly we quantify the total instances of each dynamic object and then extract them with ordered numerical prompts. Through extensive comparisons with existing methods across different datasets, our extraction framework exhibits state-of-the-art (SOTA) performance. Our codes are available at https://github.com/jiangpw41/TST.
%R 10.18653/v1/2025.acl-long.829
%U https://aclanthology.org/2025.acl-long.829/
%U https://doi.org/10.18653/v1/2025.acl-long.829
%P 16951-16966
Markdown (Informal)
[TST: A Schema-Based Top-Down and Dynamic-Aware Agent of Text-to-Table Tasks](https://aclanthology.org/2025.acl-long.829/) (Jiang et al., ACL 2025)
ACL