@inproceedings{xie-etal-2024-hw,
title = "{HW}-{TSC}{'}s submission to the {IWSLT} 2024 Subtitling track",
author = "Xie, Yuhao and
Luo, Yuanchang and
Li, Zongyao and
Wu, Zhanglin and
Chen, Xiaoyu and
Rao, Zhiqiang and
Li, Shaojun and
Shang, Hengchao and
Guo, Jiaxin and
Wei, Daimeng and
Yang, Hao",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Carpuat, Marine",
booktitle = "Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.iwslt-1.34",
pages = "286--290",
abstract = "This paper introduces HW-TSC{'}s submission to the IWSLT 2024 Subtitling track. For the automatic subtitling track, we use an unconstrained cascaded strategy, with the main steps being: ASR with word-level timestamps, sentence segmentation based on punctuation restoration, further alignment using CTC or using machine translation with length penalty. For the subtitle compression track, we employ a subtitle compression strategy that integrates machine translation models and extensive rewriting models. We acquire the subtitle text requiring revision through the CPS index, then utilize a translation model to obtain the English version of this text. Following this, we extract the compressed-length subtitle text through controlled decoding. If this method fails to compress the text successfully, we resort to the Llama2 few-shot model for further compression.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xie-etal-2024-hw">
<titleInfo>
<title>HW-TSC’s submission to the IWSLT 2024 Subtitling track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuhao</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanchang</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zongyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhanglin</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoyu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiqiang</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shaojun</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hengchao</namePart>
<namePart type="family">Shang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaxin</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daimeng</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces HW-TSC’s submission to the IWSLT 2024 Subtitling track. For the automatic subtitling track, we use an unconstrained cascaded strategy, with the main steps being: ASR with word-level timestamps, sentence segmentation based on punctuation restoration, further alignment using CTC or using machine translation with length penalty. For the subtitle compression track, we employ a subtitle compression strategy that integrates machine translation models and extensive rewriting models. We acquire the subtitle text requiring revision through the CPS index, then utilize a translation model to obtain the English version of this text. Following this, we extract the compressed-length subtitle text through controlled decoding. If this method fails to compress the text successfully, we resort to the Llama2 few-shot model for further compression.</abstract>
<identifier type="citekey">xie-etal-2024-hw</identifier>
<location>
<url>https://aclanthology.org/2024.iwslt-1.34</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>286</start>
<end>290</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HW-TSC’s submission to the IWSLT 2024 Subtitling track
%A Xie, Yuhao
%A Luo, Yuanchang
%A Li, Zongyao
%A Wu, Zhanglin
%A Chen, Xiaoyu
%A Rao, Zhiqiang
%A Li, Shaojun
%A Shang, Hengchao
%A Guo, Jiaxin
%A Wei, Daimeng
%A Yang, Hao
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Carpuat, Marine
%S Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand (in-person and online)
%F xie-etal-2024-hw
%X This paper introduces HW-TSC’s submission to the IWSLT 2024 Subtitling track. For the automatic subtitling track, we use an unconstrained cascaded strategy, with the main steps being: ASR with word-level timestamps, sentence segmentation based on punctuation restoration, further alignment using CTC or using machine translation with length penalty. For the subtitle compression track, we employ a subtitle compression strategy that integrates machine translation models and extensive rewriting models. We acquire the subtitle text requiring revision through the CPS index, then utilize a translation model to obtain the English version of this text. Following this, we extract the compressed-length subtitle text through controlled decoding. If this method fails to compress the text successfully, we resort to the Llama2 few-shot model for further compression.
%U https://aclanthology.org/2024.iwslt-1.34
%P 286-290
Markdown (Informal)
[HW-TSC’s submission to the IWSLT 2024 Subtitling track](https://aclanthology.org/2024.iwslt-1.34) (Xie et al., IWSLT 2024)
ACL
- Yuhao Xie, Yuanchang Luo, Zongyao Li, Zhanglin Wu, Xiaoyu Chen, Zhiqiang Rao, Shaojun Li, Hengchao Shang, Jiaxin Guo, Daimeng Wei, and Hao Yang. 2024. HW-TSC’s submission to the IWSLT 2024 Subtitling track. In Proceedings of the 21st International Conference on Spoken Language Translation (IWSLT 2024), pages 286–290, Bangkok, Thailand (in-person and online). Association for Computational Linguistics.