@inproceedings{pareras-etal-2026-bscs,
title = "{BSC}{'}s Submission to the Instruction Following Track of {IWSLT} 2026",
author = "Pareras, Oriol and
Llado, Joan and
Buitrago, Pol and
Casals-Salvador, Marc and
Costa, Federico and
Espana-Bonet, Cristina",
editor = "Salesky, Elizabeth and
Anastasopoulos, Antonios and
Negri, Matteo and
Federico, Marcello",
booktitle = "Proceedings of the 23rd International Conference on Spoken Language Translation ({IWSLT} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.iwslt-1.19/",
pages = "171--182",
ISBN = "979-8-89176-411-8",
abstract = "We present the Barcelona Supercomputing Center (BSC) submission to the Instruction Following (IF) track of IWSLT 2026, which evaluates unified spoken language systems capable of solving multiple tasks through natural language instructions. Our system consists of an end-to-end (E2E) architecture that combines a speech encoder with a translation-oriented Large Language Model. The model is trained on speech and text data, covering automatic speech recognition, translation, question answering, and instruction following. We investigate a Chain-of-Thought (CoT) generation strategy that explicitly decomposes tasks by producing an intermediate transcription before the final output, which enables effective reuse of text-only supervision and improves robustness across tasks. To further support generalization, we design diverse prompt formulations and align text-only and speech inputs under a shared inference pattern. Results on IWSLT 2025 evaluation data show that our approach achieves competitive and even state-of-the-art performance across tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pareras-etal-2026-bscs">
<titleInfo>
<title>BSC’s Submission to the Instruction Following Track of IWSLT 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oriol</namePart>
<namePart type="family">Pareras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joan</namePart>
<namePart type="family">Llado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pol</namePart>
<namePart type="family">Buitrago</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Casals-Salvador</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">Espana-Bonet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, USA (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-411-8</identifier>
</relatedItem>
<abstract>We present the Barcelona Supercomputing Center (BSC) submission to the Instruction Following (IF) track of IWSLT 2026, which evaluates unified spoken language systems capable of solving multiple tasks through natural language instructions. Our system consists of an end-to-end (E2E) architecture that combines a speech encoder with a translation-oriented Large Language Model. The model is trained on speech and text data, covering automatic speech recognition, translation, question answering, and instruction following. We investigate a Chain-of-Thought (CoT) generation strategy that explicitly decomposes tasks by producing an intermediate transcription before the final output, which enables effective reuse of text-only supervision and improves robustness across tasks. To further support generalization, we design diverse prompt formulations and align text-only and speech inputs under a shared inference pattern. Results on IWSLT 2025 evaluation data show that our approach achieves competitive and even state-of-the-art performance across tasks.</abstract>
<identifier type="citekey">pareras-etal-2026-bscs</identifier>
<location>
<url>https://aclanthology.org/2026.iwslt-1.19/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>171</start>
<end>182</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BSC’s Submission to the Instruction Following Track of IWSLT 2026
%A Pareras, Oriol
%A Llado, Joan
%A Buitrago, Pol
%A Casals-Salvador, Marc
%A Costa, Federico
%A Espana-Bonet, Cristina
%Y Salesky, Elizabeth
%Y Anastasopoulos, Antonios
%Y Negri, Matteo
%Y Federico, Marcello
%S Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, USA (in-person and online)
%@ 979-8-89176-411-8
%F pareras-etal-2026-bscs
%X We present the Barcelona Supercomputing Center (BSC) submission to the Instruction Following (IF) track of IWSLT 2026, which evaluates unified spoken language systems capable of solving multiple tasks through natural language instructions. Our system consists of an end-to-end (E2E) architecture that combines a speech encoder with a translation-oriented Large Language Model. The model is trained on speech and text data, covering automatic speech recognition, translation, question answering, and instruction following. We investigate a Chain-of-Thought (CoT) generation strategy that explicitly decomposes tasks by producing an intermediate transcription before the final output, which enables effective reuse of text-only supervision and improves robustness across tasks. To further support generalization, we design diverse prompt formulations and align text-only and speech inputs under a shared inference pattern. Results on IWSLT 2025 evaluation data show that our approach achieves competitive and even state-of-the-art performance across tasks.
%U https://aclanthology.org/2026.iwslt-1.19/
%P 171-182
Markdown (Informal)
[BSC’s Submission to the Instruction Following Track of IWSLT 2026](https://aclanthology.org/2026.iwslt-1.19/) (Pareras et al., IWSLT 2026)
ACL
- Oriol Pareras, Joan Llado, Pol Buitrago, Marc Casals-Salvador, Federico Costa, and Cristina Espana-Bonet. 2026. BSC’s Submission to the Instruction Following Track of IWSLT 2026. In Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026), pages 171–182, San Diego, USA (in-person and online). Association for Computational Linguistics.