@inproceedings{osuji-etal-2025-scaling,
title = "Scaling Up Data-to-Text Generation to Longer Sequences: A New Dataset and Benchmark Results for Generation from Large Triple Sets",
author = "Osuji, Chinonso Cynthia and
Mille, Simon and
O{'}Connell, Ornait and
Castro Ferreira, Thiago and
Belz, Anya and
Davis, Brian",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.inlg-main.47/",
pages = "810--822",
abstract = "The ability of LLMs to write coherent, faithful long texts from structured data inputs remains relatively uncharted, in part because nearly all public data-to-text datasets contain only short input-output pairs. To address these gaps, we benchmark six LLMs, a rule{-}based system and human-written texts on a new long-input dataset in English and Irish via LLM-based evaluation. We find substantial differences between models and languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="osuji-etal-2025-scaling">
<titleInfo>
<title>Scaling Up Data-to-Text Generation to Longer Sequences: A New Dataset and Benchmark Results for Generation from Large Triple Sets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chinonso</namePart>
<namePart type="given">Cynthia</namePart>
<namePart type="family">Osuji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ornait</namePart>
<namePart type="family">O’Connell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Castro Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Davis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashi</namePart>
<namePart type="family">Narayan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lê</namePart>
<namePart type="given">Hồng</namePart>
<namePart type="family">Phương</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahuan</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hanoi, Vietnam</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The ability of LLMs to write coherent, faithful long texts from structured data inputs remains relatively uncharted, in part because nearly all public data-to-text datasets contain only short input-output pairs. To address these gaps, we benchmark six LLMs, a rule-based system and human-written texts on a new long-input dataset in English and Irish via LLM-based evaluation. We find substantial differences between models and languages.</abstract>
<identifier type="citekey">osuji-etal-2025-scaling</identifier>
<location>
<url>https://aclanthology.org/2025.inlg-main.47/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>810</start>
<end>822</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scaling Up Data-to-Text Generation to Longer Sequences: A New Dataset and Benchmark Results for Generation from Large Triple Sets
%A Osuji, Chinonso Cynthia
%A Mille, Simon
%A O’Connell, Ornait
%A Castro Ferreira, Thiago
%A Belz, Anya
%A Davis, Brian
%Y Flek, Lucie
%Y Narayan, Shashi
%Y Phương, Lê Hồng
%Y Pei, Jiahuan
%S Proceedings of the 18th International Natural Language Generation Conference
%D 2025
%8 October
%I Association for Computational Linguistics
%C Hanoi, Vietnam
%F osuji-etal-2025-scaling
%X The ability of LLMs to write coherent, faithful long texts from structured data inputs remains relatively uncharted, in part because nearly all public data-to-text datasets contain only short input-output pairs. To address these gaps, we benchmark six LLMs, a rule-based system and human-written texts on a new long-input dataset in English and Irish via LLM-based evaluation. We find substantial differences between models and languages.
%U https://aclanthology.org/2025.inlg-main.47/
%P 810-822
Markdown (Informal)
[Scaling Up Data-to-Text Generation to Longer Sequences: A New Dataset and Benchmark Results for Generation from Large Triple Sets](https://aclanthology.org/2025.inlg-main.47/) (Osuji et al., INLG 2025)
ACL