@inproceedings{luu-bojar-2025-cuni,
title = "{CUNI}-{NL}@{IWSLT} 2025: End-to-end Offline Speech Translation and Instruction Following with {LLM}s",
author = "Luu, Nam and
Bojar, Ond{\v{r}}ej",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Anastasopoulos, Antonis",
booktitle = "Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.iwslt-1.28/",
doi = "10.18653/v1/2025.iwslt-1.28",
pages = "282--288",
ISBN = "979-8-89176-272-5",
abstract = "This paper describes the CUNI-NL team{'}s submission to the IWSLT 2025 Offline Speech Translation and Instruction Following tasks, focusing on transcribing the English audio, and translating the English audio to German text. Our systems follow the end-to-end approach, where each system consists of a pretrained, frozen speech encoder, along with a medium-sized large language model fine-tuned with LoRA on three tasks: 1) transcribing the English audio; 2) directly translating the English audio to German text; and 3) a combination of the above two tasks, i.e. simultaneously transcribing the English audio and translating the English audio to German text."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luu-bojar-2025-cuni">
<titleInfo>
<title>CUNI-NL@IWSLT 2025: End-to-end Offline Speech Translation and Instruction Following with LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nam</namePart>
<namePart type="family">Luu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonis</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-272-5</identifier>
</relatedItem>
<abstract>This paper describes the CUNI-NL team’s submission to the IWSLT 2025 Offline Speech Translation and Instruction Following tasks, focusing on transcribing the English audio, and translating the English audio to German text. Our systems follow the end-to-end approach, where each system consists of a pretrained, frozen speech encoder, along with a medium-sized large language model fine-tuned with LoRA on three tasks: 1) transcribing the English audio; 2) directly translating the English audio to German text; and 3) a combination of the above two tasks, i.e. simultaneously transcribing the English audio and translating the English audio to German text.</abstract>
<identifier type="citekey">luu-bojar-2025-cuni</identifier>
<identifier type="doi">10.18653/v1/2025.iwslt-1.28</identifier>
<location>
<url>https://aclanthology.org/2025.iwslt-1.28/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>282</start>
<end>288</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CUNI-NL@IWSLT 2025: End-to-end Offline Speech Translation and Instruction Following with LLMs
%A Luu, Nam
%A Bojar, Ondřej
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Anastasopoulos, Antonis
%S Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria (in-person and online)
%@ 979-8-89176-272-5
%F luu-bojar-2025-cuni
%X This paper describes the CUNI-NL team’s submission to the IWSLT 2025 Offline Speech Translation and Instruction Following tasks, focusing on transcribing the English audio, and translating the English audio to German text. Our systems follow the end-to-end approach, where each system consists of a pretrained, frozen speech encoder, along with a medium-sized large language model fine-tuned with LoRA on three tasks: 1) transcribing the English audio; 2) directly translating the English audio to German text; and 3) a combination of the above two tasks, i.e. simultaneously transcribing the English audio and translating the English audio to German text.
%R 10.18653/v1/2025.iwslt-1.28
%U https://aclanthology.org/2025.iwslt-1.28/
%U https://doi.org/10.18653/v1/2025.iwslt-1.28
%P 282-288
Markdown (Informal)
[CUNI-NL@IWSLT 2025: End-to-end Offline Speech Translation and Instruction Following with LLMs](https://aclanthology.org/2025.iwslt-1.28/) (Luu & Bojar, IWSLT 2025)
ACL