@inproceedings{magnini-zanoli-2024-understanding,
title = "Understanding High-complexity Technical Documents with State-of-Art Models",
author = "Magnini, Bernardo and
Zanoli, Roberto",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.64/",
pages = "540--547",
ISBN = "979-12-210-7060-6",
abstract = "Technical documents, particularly those in civil engineering, contain crucial information that supports critical decision-making in construction, transportation and infrastructure projects. Large language models (LLMs) offer a promising solution for automating the extraction and comprehension of technical documents, potentially transforming our interaction with technical information. However, LLMs may encounter significant challenges when processing technical documents due to their complex structure, specialized terminology and reliance on graphical and visual elements. Moreover, LLMs are known to sometimes produce unexpected or incorrect analyses, a phenomenon referred to as hallucination.This study explores the potential of state-of-the-art LLMs, specifically GPT-4omni, to automate the comprehension of technical documents. The evaluation was performed on two types of PDF documents. The first type is selectable text PDFs, which are extractable and editable, focusing on civil engineering documents from the Italian state railways. The second type is scanned OCR PDFs, where text is derived from scanning or OCR, specifically focusing on the design of an outdoor swimming pool. These documents include textual and visual elements such as tables, figures and photos. Our findings suggest that GPT-4omni has a high potential for real-world use, although it may still be susceptible to producing misleading information."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="magnini-zanoli-2024-understanding">
<titleInfo>
<title>Understanding High-complexity Technical Documents with State-of-Art Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bernardo</namePart>
<namePart type="family">Magnini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Zanoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>Technical documents, particularly those in civil engineering, contain crucial information that supports critical decision-making in construction, transportation and infrastructure projects. Large language models (LLMs) offer a promising solution for automating the extraction and comprehension of technical documents, potentially transforming our interaction with technical information. However, LLMs may encounter significant challenges when processing technical documents due to their complex structure, specialized terminology and reliance on graphical and visual elements. Moreover, LLMs are known to sometimes produce unexpected or incorrect analyses, a phenomenon referred to as hallucination.This study explores the potential of state-of-the-art LLMs, specifically GPT-4omni, to automate the comprehension of technical documents. The evaluation was performed on two types of PDF documents. The first type is selectable text PDFs, which are extractable and editable, focusing on civil engineering documents from the Italian state railways. The second type is scanned OCR PDFs, where text is derived from scanning or OCR, specifically focusing on the design of an outdoor swimming pool. These documents include textual and visual elements such as tables, figures and photos. Our findings suggest that GPT-4omni has a high potential for real-world use, although it may still be susceptible to producing misleading information.</abstract>
<identifier type="citekey">magnini-zanoli-2024-understanding</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.64/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>540</start>
<end>547</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Understanding High-complexity Technical Documents with State-of-Art Models
%A Magnini, Bernardo
%A Zanoli, Roberto
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F magnini-zanoli-2024-understanding
%X Technical documents, particularly those in civil engineering, contain crucial information that supports critical decision-making in construction, transportation and infrastructure projects. Large language models (LLMs) offer a promising solution for automating the extraction and comprehension of technical documents, potentially transforming our interaction with technical information. However, LLMs may encounter significant challenges when processing technical documents due to their complex structure, specialized terminology and reliance on graphical and visual elements. Moreover, LLMs are known to sometimes produce unexpected or incorrect analyses, a phenomenon referred to as hallucination.This study explores the potential of state-of-the-art LLMs, specifically GPT-4omni, to automate the comprehension of technical documents. The evaluation was performed on two types of PDF documents. The first type is selectable text PDFs, which are extractable and editable, focusing on civil engineering documents from the Italian state railways. The second type is scanned OCR PDFs, where text is derived from scanning or OCR, specifically focusing on the design of an outdoor swimming pool. These documents include textual and visual elements such as tables, figures and photos. Our findings suggest that GPT-4omni has a high potential for real-world use, although it may still be susceptible to producing misleading information.
%U https://aclanthology.org/2024.clicit-1.64/
%P 540-547
Markdown (Informal)
[Understanding High-complexity Technical Documents with State-of-Art Models](https://aclanthology.org/2024.clicit-1.64/) (Magnini & Zanoli, CLiC-it 2024)
ACL