@inproceedings{fischer-etal-2025-swissadt,
title = "{S}wiss{ADT}: An Audio Description Translation System for {S}wiss Languages",
author = "Fischer, Lukas and
Gao, Yingqiang and
Lintner, Alexa and
Rios, Annette and
Ebling, Sarah",
editor = "Chen, Weizhu and
Yang, Yi and
Kachuee, Mohammad and
Fu, Xue-Yong",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-industry.31/",
doi = "10.18653/v1/2025.naacl-industry.31",
pages = "370--379",
ISBN = "979-8-89176-194-0",
abstract = "Audio description (AD) is a crucial accessibility service provided to blind persons and persons with visual impairment, designed to convey visual information in acoustic form. Despite recent advancements in multilingual machine translation research, the lack of well-crafted and time-synchronized AD data impedes the development of audio description translation (ADT) systems that address the needs of multilingual countries such as Switzerland. Furthermore, most ADT systems rely on text alone, and it is unclear whether incorporating visual information from video clips improves the quality of ADT outputs.In this work, we introduce SwissADT, an **emerging** ADT system for three main Swiss languages and English, designed for future use by our industry partners. By collecting well-crafted AD data augmented with video clips in German, French, Italian, and English, and leveraging the power of Large Language Models (LLMs), we aim to enhance information accessibility for diverse language populations in Switzerland by automatically translating AD scripts to the desired Swiss language. Our extensive experimental ADT results, composed of both automatic and human evaluations of ADT quality, demonstrate the promising capability of SwissADT for the ADT task. We believe that combining human expertise with the generation power of LLMs can further enhance the performance of ADT systems, ultimately benefiting a larger multilingual target population."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fischer-etal-2025-swissadt">
<titleInfo>
<title>SwissADT: An Audio Description Translation System for Swiss Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lukas</namePart>
<namePart type="family">Fischer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingqiang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexa</namePart>
<namePart type="family">Lintner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annette</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Ebling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weizhu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Kachuee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xue-Yong</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-194-0</identifier>
</relatedItem>
<abstract>Audio description (AD) is a crucial accessibility service provided to blind persons and persons with visual impairment, designed to convey visual information in acoustic form. Despite recent advancements in multilingual machine translation research, the lack of well-crafted and time-synchronized AD data impedes the development of audio description translation (ADT) systems that address the needs of multilingual countries such as Switzerland. Furthermore, most ADT systems rely on text alone, and it is unclear whether incorporating visual information from video clips improves the quality of ADT outputs.In this work, we introduce SwissADT, an **emerging** ADT system for three main Swiss languages and English, designed for future use by our industry partners. By collecting well-crafted AD data augmented with video clips in German, French, Italian, and English, and leveraging the power of Large Language Models (LLMs), we aim to enhance information accessibility for diverse language populations in Switzerland by automatically translating AD scripts to the desired Swiss language. Our extensive experimental ADT results, composed of both automatic and human evaluations of ADT quality, demonstrate the promising capability of SwissADT for the ADT task. We believe that combining human expertise with the generation power of LLMs can further enhance the performance of ADT systems, ultimately benefiting a larger multilingual target population.</abstract>
<identifier type="citekey">fischer-etal-2025-swissadt</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-industry.31</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-industry.31/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>370</start>
<end>379</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SwissADT: An Audio Description Translation System for Swiss Languages
%A Fischer, Lukas
%A Gao, Yingqiang
%A Lintner, Alexa
%A Rios, Annette
%A Ebling, Sarah
%Y Chen, Weizhu
%Y Yang, Yi
%Y Kachuee, Mohammad
%Y Fu, Xue-Yong
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-194-0
%F fischer-etal-2025-swissadt
%X Audio description (AD) is a crucial accessibility service provided to blind persons and persons with visual impairment, designed to convey visual information in acoustic form. Despite recent advancements in multilingual machine translation research, the lack of well-crafted and time-synchronized AD data impedes the development of audio description translation (ADT) systems that address the needs of multilingual countries such as Switzerland. Furthermore, most ADT systems rely on text alone, and it is unclear whether incorporating visual information from video clips improves the quality of ADT outputs.In this work, we introduce SwissADT, an **emerging** ADT system for three main Swiss languages and English, designed for future use by our industry partners. By collecting well-crafted AD data augmented with video clips in German, French, Italian, and English, and leveraging the power of Large Language Models (LLMs), we aim to enhance information accessibility for diverse language populations in Switzerland by automatically translating AD scripts to the desired Swiss language. Our extensive experimental ADT results, composed of both automatic and human evaluations of ADT quality, demonstrate the promising capability of SwissADT for the ADT task. We believe that combining human expertise with the generation power of LLMs can further enhance the performance of ADT systems, ultimately benefiting a larger multilingual target population.
%R 10.18653/v1/2025.naacl-industry.31
%U https://aclanthology.org/2025.naacl-industry.31/
%U https://doi.org/10.18653/v1/2025.naacl-industry.31
%P 370-379
Markdown (Informal)
[SwissADT: An Audio Description Translation System for Swiss Languages](https://aclanthology.org/2025.naacl-industry.31/) (Fischer et al., NAACL 2025)
ACL
- Lukas Fischer, Yingqiang Gao, Alexa Lintner, Annette Rios, and Sarah Ebling. 2025. SwissADT: An Audio Description Translation System for Swiss Languages. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track), pages 370–379, Albuquerque, New Mexico. Association for Computational Linguistics.