@inproceedings{oliverio-etal-2024-dipinfo,
title = "{D}ip{I}nfo-{U}ni{T}o at the {GEM}{'}24 Data-to-Text Task: Augmenting {LLM}s with the Split-Generate-Aggregate Pipeline",
author = "Oliverio, Michael and
Balestrucci, Pier Felice and
Mazzei, Alessandro and
Basile, Valerio",
editor = "Mille, Simon and
Clinciu, Miruna-Adriana",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.inlg-genchal.6",
pages = "59--65",
abstract = "This paper describes the DipInfo-UniTo system participating to the GEM shared task 2024. We participate only to the Data-to-Text (D2T) task. The DipInfo-UniTo system is based on Mistral (Jiang et al., 2023), a recent Large Language Model (LLM). Most LLMs are capable of generating high-quality text for D2T tasks but, crucially, they often fall short in terms of adequacy, and sometimes exhibit {``}hallucinations{''}. To mitigate this issue, we have implemented a generation pipeline that combines LLMs with techniques from the traditional Natural Language Generation (NLG) pipeline. In particular, we have a three step process SGA, consisting in (1) Splitting the original set of triples, (2) Generating verbalizations from the resulting split data units, (3) Aggregating the verbalizations produced in the previous step.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oliverio-etal-2024-dipinfo">
<titleInfo>
<title>DipInfo-UniTo at the GEM’24 Data-to-Text Task: Augmenting LLMs with the Split-Generate-Aggregate Pipeline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Oliverio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pier</namePart>
<namePart type="given">Felice</namePart>
<namePart type="family">Balestrucci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Mazzei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miruna-Adriana</namePart>
<namePart type="family">Clinciu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the DipInfo-UniTo system participating to the GEM shared task 2024. We participate only to the Data-to-Text (D2T) task. The DipInfo-UniTo system is based on Mistral (Jiang et al., 2023), a recent Large Language Model (LLM). Most LLMs are capable of generating high-quality text for D2T tasks but, crucially, they often fall short in terms of adequacy, and sometimes exhibit “hallucinations”. To mitigate this issue, we have implemented a generation pipeline that combines LLMs with techniques from the traditional Natural Language Generation (NLG) pipeline. In particular, we have a three step process SGA, consisting in (1) Splitting the original set of triples, (2) Generating verbalizations from the resulting split data units, (3) Aggregating the verbalizations produced in the previous step.</abstract>
<identifier type="citekey">oliverio-etal-2024-dipinfo</identifier>
<location>
<url>https://aclanthology.org/2024.inlg-genchal.6</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>59</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DipInfo-UniTo at the GEM’24 Data-to-Text Task: Augmenting LLMs with the Split-Generate-Aggregate Pipeline
%A Oliverio, Michael
%A Balestrucci, Pier Felice
%A Mazzei, Alessandro
%A Basile, Valerio
%Y Mille, Simon
%Y Clinciu, Miruna-Adriana
%S Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges
%D 2024
%8 September
%I Association for Computational Linguistics
%C Tokyo, Japan
%F oliverio-etal-2024-dipinfo
%X This paper describes the DipInfo-UniTo system participating to the GEM shared task 2024. We participate only to the Data-to-Text (D2T) task. The DipInfo-UniTo system is based on Mistral (Jiang et al., 2023), a recent Large Language Model (LLM). Most LLMs are capable of generating high-quality text for D2T tasks but, crucially, they often fall short in terms of adequacy, and sometimes exhibit “hallucinations”. To mitigate this issue, we have implemented a generation pipeline that combines LLMs with techniques from the traditional Natural Language Generation (NLG) pipeline. In particular, we have a three step process SGA, consisting in (1) Splitting the original set of triples, (2) Generating verbalizations from the resulting split data units, (3) Aggregating the verbalizations produced in the previous step.
%U https://aclanthology.org/2024.inlg-genchal.6
%P 59-65
Markdown (Informal)
[DipInfo-UniTo at the GEM’24 Data-to-Text Task: Augmenting LLMs with the Split-Generate-Aggregate Pipeline](https://aclanthology.org/2024.inlg-genchal.6) (Oliverio et al., INLG 2024)
ACL