@inproceedings{zhao-etal-2023-investigating,
title = "Investigating Table-to-Text Generation Capabilities of Large Language Models in Real-World Information Seeking Scenarios",
author = "Zhao, Yilun and
Zhang, Haowei and
Si, Shengyun and
Nan, Linyong and
Tang, Xiangru and
Cohan, Arman",
editor = "Wang, Mingxuan and
Zitouni, Imed",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-industry.17",
doi = "10.18653/v1/2023.emnlp-industry.17",
pages = "160--175",
abstract = "Tabular data is prevalent across various industries, necessitating significant time and effort for users to understand and manipulate for their information-seeking purposes. The advancements in large language models (LLMs) have shown enormous potential to improve user efficiency. However, the adoption of LLMs in real-world applications for table information seeking remains underexplored. In this paper, we investigate the table-to-text capabilities of different LLMs using four datasets within two real-world information seeking scenarios. These include the LogicNLG and our newly-constructed LoTNLG datasets for data insight generation, along with the FeTaQA and our newly-constructed F2WTQ datasets for query-based generation. We structure our investigation around three research questions, evaluating the performance of LLMs in table-to-text generation, automated evaluation, and feedback generation, respectively. Experimental results indicate that the current high-performing LLM, specifically GPT-4, can effectively serve as a table-to-text generator, evaluator, and feedback generator, facilitating users{'} information seeking purposes in real-world scenarios. However, a significant performance gap still exists between other open-sourced LLMs (e.g., Vicuna and LLaMA-2) and GPT-4 models. Our data and code are publicly available at https://github.com/yale-nlp/LLM-T2T.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2023-investigating">
<titleInfo>
<title>Investigating Table-to-Text Generation Capabilities of Large Language Models in Real-World Information Seeking Scenarios</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yilun</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haowei</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shengyun</namePart>
<namePart type="family">Si</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linyong</namePart>
<namePart type="family">Nan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangru</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arman</namePart>
<namePart type="family">Cohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mingxuan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Tabular data is prevalent across various industries, necessitating significant time and effort for users to understand and manipulate for their information-seeking purposes. The advancements in large language models (LLMs) have shown enormous potential to improve user efficiency. However, the adoption of LLMs in real-world applications for table information seeking remains underexplored. In this paper, we investigate the table-to-text capabilities of different LLMs using four datasets within two real-world information seeking scenarios. These include the LogicNLG and our newly-constructed LoTNLG datasets for data insight generation, along with the FeTaQA and our newly-constructed F2WTQ datasets for query-based generation. We structure our investigation around three research questions, evaluating the performance of LLMs in table-to-text generation, automated evaluation, and feedback generation, respectively. Experimental results indicate that the current high-performing LLM, specifically GPT-4, can effectively serve as a table-to-text generator, evaluator, and feedback generator, facilitating users’ information seeking purposes in real-world scenarios. However, a significant performance gap still exists between other open-sourced LLMs (e.g., Vicuna and LLaMA-2) and GPT-4 models. Our data and code are publicly available at https://github.com/yale-nlp/LLM-T2T.</abstract>
<identifier type="citekey">zhao-etal-2023-investigating</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-industry.17</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-industry.17</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>160</start>
<end>175</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating Table-to-Text Generation Capabilities of Large Language Models in Real-World Information Seeking Scenarios
%A Zhao, Yilun
%A Zhang, Haowei
%A Si, Shengyun
%A Nan, Linyong
%A Tang, Xiangru
%A Cohan, Arman
%Y Wang, Mingxuan
%Y Zitouni, Imed
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F zhao-etal-2023-investigating
%X Tabular data is prevalent across various industries, necessitating significant time and effort for users to understand and manipulate for their information-seeking purposes. The advancements in large language models (LLMs) have shown enormous potential to improve user efficiency. However, the adoption of LLMs in real-world applications for table information seeking remains underexplored. In this paper, we investigate the table-to-text capabilities of different LLMs using four datasets within two real-world information seeking scenarios. These include the LogicNLG and our newly-constructed LoTNLG datasets for data insight generation, along with the FeTaQA and our newly-constructed F2WTQ datasets for query-based generation. We structure our investigation around three research questions, evaluating the performance of LLMs in table-to-text generation, automated evaluation, and feedback generation, respectively. Experimental results indicate that the current high-performing LLM, specifically GPT-4, can effectively serve as a table-to-text generator, evaluator, and feedback generator, facilitating users’ information seeking purposes in real-world scenarios. However, a significant performance gap still exists between other open-sourced LLMs (e.g., Vicuna and LLaMA-2) and GPT-4 models. Our data and code are publicly available at https://github.com/yale-nlp/LLM-T2T.
%R 10.18653/v1/2023.emnlp-industry.17
%U https://aclanthology.org/2023.emnlp-industry.17
%U https://doi.org/10.18653/v1/2023.emnlp-industry.17
%P 160-175
Markdown (Informal)
[Investigating Table-to-Text Generation Capabilities of Large Language Models in Real-World Information Seeking Scenarios](https://aclanthology.org/2023.emnlp-industry.17) (Zhao et al., EMNLP 2023)
ACL