@inproceedings{awiszus-etal-2024-charles,
title = "{C}harles Locock, Lowcock or Lockhart? Offline Speech Translation: Test Suite for Named Entities",
author = {Awiszus, Maximilian and
Niehues, Jan and
Turchi, Marco and
St{\"u}ker, Sebastian and
Waibel, Alex},
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.iwslt-1.35/",
doi = "10.18653/v1/2024.iwslt-1.35",
pages = "291--297",
abstract = "Generating rare words is a challenging task for natural language processing in general and in speech translation (ST) specifically. This paper introduces a test suite prepared for the Offline ST shared task at IWSLT. In the test suite, corresponding rare words (i.e. named entities) were annotated on TED-Talks for English and German and the English side was made available to the participants together with some distractors (irrelevant named entities). Our evaluation checks the capabilities of ST systems to leverage the information in the contextual list of named entities and improve translation quality. Systems are ranked based on the recall and precision of named entities (separately on person, location, and organization names) in the translated texts. Our evaluation shows that using contextual information improves translation quality as well as the recall and precision of NEs. The recall of organization names in all submissions is the lowest of all categories with a maximum of 87.5 {\%} confirming the difficulties of ST systems in dealing with names."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="awiszus-etal-2024-charles">
<titleInfo>
<title>Charles Locock, Lowcock or Lockhart? Offline Speech Translation: Test Suite for Named Entities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Awiszus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generating rare words is a challenging task for natural language processing in general and in speech translation (ST) specifically. This paper introduces a test suite prepared for the Offline ST shared task at IWSLT. In the test suite, corresponding rare words (i.e. named entities) were annotated on TED-Talks for English and German and the English side was made available to the participants together with some distractors (irrelevant named entities). Our evaluation checks the capabilities of ST systems to leverage the information in the contextual list of named entities and improve translation quality. Systems are ranked based on the recall and precision of named entities (separately on person, location, and organization names) in the translated texts. Our evaluation shows that using contextual information improves translation quality as well as the recall and precision of NEs. The recall of organization names in all submissions is the lowest of all categories with a maximum of 87.5 % confirming the difficulties of ST systems in dealing with names.</abstract>
<identifier type="citekey">awiszus-etal-2024-charles</identifier>
<identifier type="doi">10.18653/v1/2024.iwslt-1.35</identifier>
<location>
<url>https://aclanthology.org/2024.iwslt-1.35/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>291</start>
<end>297</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Charles Locock, Lowcock or Lockhart? Offline Speech Translation: Test Suite for Named Entities
%A Awiszus, Maximilian
%A Niehues, Jan
%A Turchi, Marco
%A Stüker, Sebastian
%A Waibel, Alex
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand (in-person and online)
%F awiszus-etal-2024-charles
%X Generating rare words is a challenging task for natural language processing in general and in speech translation (ST) specifically. This paper introduces a test suite prepared for the Offline ST shared task at IWSLT. In the test suite, corresponding rare words (i.e. named entities) were annotated on TED-Talks for English and German and the English side was made available to the participants together with some distractors (irrelevant named entities). Our evaluation checks the capabilities of ST systems to leverage the information in the contextual list of named entities and improve translation quality. Systems are ranked based on the recall and precision of named entities (separately on person, location, and organization names) in the translated texts. Our evaluation shows that using contextual information improves translation quality as well as the recall and precision of NEs. The recall of organization names in all submissions is the lowest of all categories with a maximum of 87.5 % confirming the difficulties of ST systems in dealing with names.
%R 10.18653/v1/2024.iwslt-1.35
%U https://aclanthology.org/2024.iwslt-1.35/
%U https://doi.org/10.18653/v1/2024.iwslt-1.35
%P 291-297
Markdown (Informal)
[Charles Locock, Lowcock or Lockhart? Offline Speech Translation: Test Suite for Named Entities](https://aclanthology.org/2024.iwslt-1.35/) (Awiszus et al., IWSLT 2024)
ACL