@inproceedings{korkmaz-del-rio-chanona-2024-integrating,
title = "Integrating Table Representations into Large Language Models for Improved Scholarly Document Comprehension",
author = "Korkmaz, Buse Sibel and
Del Rio Chanona, Antonio",
editor = "Ghosal, Tirthankar and
Singh, Amanpreet and
Waard, Anita and
Mayr, Philipp and
Naik, Aakanksha and
Weller, Orion and
Lee, Yoonjoo and
Shen, Shannon and
Qin, Yanxia",
booktitle = "Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sdp-1.28",
pages = "293--306",
abstract = "We address the challenge of interpreting and reasoning over scientific tables with Large Language Models (LLMs), a crucial aspect of scholarly documents. Despite significant progress in natural language processing, the integration of tabular data into scientific LLMs remains limited. We propose an innovative approach leveraging intermediate task pre-training on table question-answering datasets, followed by model adaptation to comprehend tables in computer science literature. Our findings reveal that incorporating table understanding substantially improves the performance of LLMs on scientific literature understanding tasks, which we showcase in peer-review score prediction. This improvement underscores the importance of utilizing tabular data in the training of scientific language models. The code and models are publicly available at [this link](https://github.com/buseskorkmaz/Integrating-Table-Representations-into-LLMs).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="korkmaz-del-rio-chanona-2024-integrating">
<titleInfo>
<title>Integrating Table Representations into Large Language Models for Improved Scholarly Document Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Buse</namePart>
<namePart type="given">Sibel</namePart>
<namePart type="family">Korkmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Del Rio Chanona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orion</namePart>
<namePart type="family">Weller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoonjoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxia</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We address the challenge of interpreting and reasoning over scientific tables with Large Language Models (LLMs), a crucial aspect of scholarly documents. Despite significant progress in natural language processing, the integration of tabular data into scientific LLMs remains limited. We propose an innovative approach leveraging intermediate task pre-training on table question-answering datasets, followed by model adaptation to comprehend tables in computer science literature. Our findings reveal that incorporating table understanding substantially improves the performance of LLMs on scientific literature understanding tasks, which we showcase in peer-review score prediction. This improvement underscores the importance of utilizing tabular data in the training of scientific language models. The code and models are publicly available at [this link](https://github.com/buseskorkmaz/Integrating-Table-Representations-into-LLMs).</abstract>
<identifier type="citekey">korkmaz-del-rio-chanona-2024-integrating</identifier>
<location>
<url>https://aclanthology.org/2024.sdp-1.28</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>293</start>
<end>306</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Integrating Table Representations into Large Language Models for Improved Scholarly Document Comprehension
%A Korkmaz, Buse Sibel
%A Del Rio Chanona, Antonio
%Y Ghosal, Tirthankar
%Y Singh, Amanpreet
%Y Waard, Anita
%Y Mayr, Philipp
%Y Naik, Aakanksha
%Y Weller, Orion
%Y Lee, Yoonjoo
%Y Shen, Shannon
%Y Qin, Yanxia
%S Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F korkmaz-del-rio-chanona-2024-integrating
%X We address the challenge of interpreting and reasoning over scientific tables with Large Language Models (LLMs), a crucial aspect of scholarly documents. Despite significant progress in natural language processing, the integration of tabular data into scientific LLMs remains limited. We propose an innovative approach leveraging intermediate task pre-training on table question-answering datasets, followed by model adaptation to comprehend tables in computer science literature. Our findings reveal that incorporating table understanding substantially improves the performance of LLMs on scientific literature understanding tasks, which we showcase in peer-review score prediction. This improvement underscores the importance of utilizing tabular data in the training of scientific language models. The code and models are publicly available at [this link](https://github.com/buseskorkmaz/Integrating-Table-Representations-into-LLMs).
%U https://aclanthology.org/2024.sdp-1.28
%P 293-306
Markdown (Informal)
[Integrating Table Representations into Large Language Models for Improved Scholarly Document Comprehension](https://aclanthology.org/2024.sdp-1.28) (Korkmaz & Del Rio Chanona, sdp-WS 2024)
ACL