@inproceedings{staiano-etal-2025-italert,
title = "{ITALERT}: Assessing the Quality of {LLM}s and {NMT} in Translating {I}talian Emergency Response Text",
author = "Staiano, Maria Carmen and
Han, Lifeng and
Monti, Johanna and
Chiusaroli, Francesca",
editor = "Bouillon, Pierrette and
Gerlach, Johanna and
Girletti, Sabrina and
Volkart, Lise and
Rubino, Raphael and
Sennrich, Rico and
Farinha, Ana C. and
Gaido, Marco and
Daems, Joke and
Kenny, Dorothy and
Moniz, Helena and
Szoc, Sara",
booktitle = "Proceedings of Machine Translation Summit XX: Volume 1",
month = jun,
year = "2025",
address = "Geneva, Switzerland",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2025.mtsummit-1.43/",
pages = "566--577",
ISBN = "978-2-9701897-0-1",
abstract = "This paper presents the outcomes of an initial investigation into the performance of Large Language Models (LLMs) and Neural Machine Translation (NMT) systems in translating high-stakes messages. The research employed a novel bilingual corpus, ITALERT (Italian Emergency Response Text) and applied a human-centric post-editing based metric (HOPE) to assess translation quality systematically. The initial dataset contains eleven texts in Italian and their corresponding English translations, both extracted from the national communication campaign website of the Italian Civil Protection Department. The texts deal with eight crisis scenarios: flooding, earthquake, forest fire, volcanic eruption, tsunami, industrial accident, nuclear risk, and dam failure. The dataset has been carefully compiled to ensure usability and clarity for evaluating machine translation (MT) systems in crisis settings. Our findings show that current LLMs and NMT models, such as ChatGPT (OpenAI{'}s GPT-4o model) and Google MT, face limitations in translating emergency texts, particularly in maintaining the appropriate register, resolving context ambiguities, and managing domain-specific terminology."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="staiano-etal-2025-italert">
<titleInfo>
<title>ITALERT: Assessing the Quality of LLMs and NMT in Translating Italian Emergency Response Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Carmen</namePart>
<namePart type="family">Staiano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lifeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesca</namePart>
<namePart type="family">Chiusaroli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XX: Volume 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pierrette</namePart>
<namePart type="family">Bouillon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Gerlach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabrina</namePart>
<namePart type="family">Girletti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lise</namePart>
<namePart type="family">Volkart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raphael</namePart>
<namePart type="family">Rubino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rico</namePart>
<namePart type="family">Sennrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Farinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Gaido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joke</namePart>
<namePart type="family">Daems</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dorothy</namePart>
<namePart type="family">Kenny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Szoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Geneva, Switzerland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-2-9701897-0-1</identifier>
</relatedItem>
<abstract>This paper presents the outcomes of an initial investigation into the performance of Large Language Models (LLMs) and Neural Machine Translation (NMT) systems in translating high-stakes messages. The research employed a novel bilingual corpus, ITALERT (Italian Emergency Response Text) and applied a human-centric post-editing based metric (HOPE) to assess translation quality systematically. The initial dataset contains eleven texts in Italian and their corresponding English translations, both extracted from the national communication campaign website of the Italian Civil Protection Department. The texts deal with eight crisis scenarios: flooding, earthquake, forest fire, volcanic eruption, tsunami, industrial accident, nuclear risk, and dam failure. The dataset has been carefully compiled to ensure usability and clarity for evaluating machine translation (MT) systems in crisis settings. Our findings show that current LLMs and NMT models, such as ChatGPT (OpenAI’s GPT-4o model) and Google MT, face limitations in translating emergency texts, particularly in maintaining the appropriate register, resolving context ambiguities, and managing domain-specific terminology.</abstract>
<identifier type="citekey">staiano-etal-2025-italert</identifier>
<location>
<url>https://aclanthology.org/2025.mtsummit-1.43/</url>
</location>
<part>
<date>2025-06</date>
<extent unit="page">
<start>566</start>
<end>577</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ITALERT: Assessing the Quality of LLMs and NMT in Translating Italian Emergency Response Text
%A Staiano, Maria Carmen
%A Han, Lifeng
%A Monti, Johanna
%A Chiusaroli, Francesca
%Y Bouillon, Pierrette
%Y Gerlach, Johanna
%Y Girletti, Sabrina
%Y Volkart, Lise
%Y Rubino, Raphael
%Y Sennrich, Rico
%Y Farinha, Ana C.
%Y Gaido, Marco
%Y Daems, Joke
%Y Kenny, Dorothy
%Y Moniz, Helena
%Y Szoc, Sara
%S Proceedings of Machine Translation Summit XX: Volume 1
%D 2025
%8 June
%I European Association for Machine Translation
%C Geneva, Switzerland
%@ 978-2-9701897-0-1
%F staiano-etal-2025-italert
%X This paper presents the outcomes of an initial investigation into the performance of Large Language Models (LLMs) and Neural Machine Translation (NMT) systems in translating high-stakes messages. The research employed a novel bilingual corpus, ITALERT (Italian Emergency Response Text) and applied a human-centric post-editing based metric (HOPE) to assess translation quality systematically. The initial dataset contains eleven texts in Italian and their corresponding English translations, both extracted from the national communication campaign website of the Italian Civil Protection Department. The texts deal with eight crisis scenarios: flooding, earthquake, forest fire, volcanic eruption, tsunami, industrial accident, nuclear risk, and dam failure. The dataset has been carefully compiled to ensure usability and clarity for evaluating machine translation (MT) systems in crisis settings. Our findings show that current LLMs and NMT models, such as ChatGPT (OpenAI’s GPT-4o model) and Google MT, face limitations in translating emergency texts, particularly in maintaining the appropriate register, resolving context ambiguities, and managing domain-specific terminology.
%U https://aclanthology.org/2025.mtsummit-1.43/
%P 566-577
Markdown (Informal)
[ITALERT: Assessing the Quality of LLMs and NMT in Translating Italian Emergency Response Text](https://aclanthology.org/2025.mtsummit-1.43/) (Staiano et al., MTSummit 2025)
ACL