@inproceedings{zhou-guojiang-ang-dong-etal-2023-ji,
title = "基于语音文本跨模态表征对齐的端到端语音翻译(End-to-end Speech Translation Based on Cross-modal Representation Alignment of Speech and Text)",
author = "Zhou, Guojiang ang Dong, Ling and
Yu, Zhengtao and
Gao, Shengxiang and
Wang, Wenjun and
Ma, Houli and
周, 国江 and
董, 凌 and
余, 正涛 and
高, 盛祥 and
王, 文君 and
马, 候丽",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2023.ccl-1.7",
pages = "78--89",
abstract = "{``}端到端语音翻译需要解决源语言语音到目标语言文本的跨语言和跨模态映射,有限标注数据条件下,建立语音文本表征间的统一映射,缓解跨模态差异是提升语音翻译性能的关键。本文提出语音文本跨模态表征对齐方法,对语音文本表征进行多粒度对齐并进行混合作为并行输入,基于多模态表征的一致性约束进行多任务融合训练。在MuST-C数据集上的实验表明,本文所提方法优于现有端到端语音翻译跨模态表征相关方法,有效提升了语音翻译模型跨模态映射能力和翻译性能。{''}",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-guojiang-ang-dong-etal-2023-ji">
<titleInfo>
<title>基于语音文本跨模态表征对齐的端到端语音翻译(End-to-end Speech Translation Based on Cross-modal Representation Alignment of Speech and Text)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guojiang</namePart>
<namePart type="given">ang</namePart>
<namePart type="given">Dong,</namePart>
<namePart type="given">Ling</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhengtao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shengxiang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjun</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houli</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">国江</namePart>
<namePart type="family">周</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">凌</namePart>
<namePart type="family">董</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">正涛</namePart>
<namePart type="family">余</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">盛祥</namePart>
<namePart type="family">高</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">文君</namePart>
<namePart type="family">王</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">候丽</namePart>
<namePart type="family">马</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Harbin, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“端到端语音翻译需要解决源语言语音到目标语言文本的跨语言和跨模态映射,有限标注数据条件下,建立语音文本表征间的统一映射,缓解跨模态差异是提升语音翻译性能的关键。本文提出语音文本跨模态表征对齐方法,对语音文本表征进行多粒度对齐并进行混合作为并行输入,基于多模态表征的一致性约束进行多任务融合训练。在MuST-C数据集上的实验表明,本文所提方法优于现有端到端语音翻译跨模态表征相关方法,有效提升了语音翻译模型跨模态映射能力和翻译性能。”</abstract>
<identifier type="citekey">zhou-guojiang-ang-dong-etal-2023-ji</identifier>
<location>
<url>https://aclanthology.org/2023.ccl-1.7</url>
</location>
<part>
<date>2023-08</date>
<extent unit="page">
<start>78</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 基于语音文本跨模态表征对齐的端到端语音翻译(End-to-end Speech Translation Based on Cross-modal Representation Alignment of Speech and Text)
%A Zhou, Guojiang ang Dong, Ling
%A Yu, Zhengtao
%A Gao, Shengxiang
%A Wang, Wenjun
%A Ma, Houli
%A 周, 国江
%A 董, 凌
%A 余, 正涛
%A 高, 盛祥
%A 王, 文君
%A 马, 候丽
%Y Sun, Maosong
%Y Qin, Bing
%Y Qiu, Xipeng
%Y Jiang, Jing
%Y Han, Xianpei
%S Proceedings of the 22nd Chinese National Conference on Computational Linguistics
%D 2023
%8 August
%I Chinese Information Processing Society of China
%C Harbin, China
%G Chinese
%F zhou-guojiang-ang-dong-etal-2023-ji
%X “端到端语音翻译需要解决源语言语音到目标语言文本的跨语言和跨模态映射,有限标注数据条件下,建立语音文本表征间的统一映射,缓解跨模态差异是提升语音翻译性能的关键。本文提出语音文本跨模态表征对齐方法,对语音文本表征进行多粒度对齐并进行混合作为并行输入,基于多模态表征的一致性约束进行多任务融合训练。在MuST-C数据集上的实验表明,本文所提方法优于现有端到端语音翻译跨模态表征相关方法,有效提升了语音翻译模型跨模态映射能力和翻译性能。”
%U https://aclanthology.org/2023.ccl-1.7
%P 78-89
Markdown (Informal)
[基于语音文本跨模态表征对齐的端到端语音翻译(End-to-end Speech Translation Based on Cross-modal Representation Alignment of Speech and Text)](https://aclanthology.org/2023.ccl-1.7) (Zhou, Guojiang ang Dong et al., CCL 2023)
ACL
- Ling Zhou, Guojiang ang Dong, Zhengtao Yu, Shengxiang Gao, Wenjun Wang, Houli Ma, 国江 周, 凌 董, 正涛 余, 盛祥 高, 文君 王, and 候丽 马. 2023. 基于语音文本跨模态表征对齐的端到端语音翻译(End-to-end Speech Translation Based on Cross-modal Representation Alignment of Speech and Text). In Proceedings of the 22nd Chinese National Conference on Computational Linguistics, pages 78–89, Harbin, China. Chinese Information Processing Society of China.