@inproceedings{shi-etal-2023-ji,
title = "基于词频效应控制的神经机器翻译用词多样性增强方法(Improving Word-level Diversity in Neural Machine Translation by Controlling the Effects of Word Frequency)",
author = "Shi, Xuewen and
Jian, Ping and
Tang, Yikun and
HUang, Heyan",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2023.ccl-1.6",
pages = "64--77",
abstract = "{``}通过最大似然估计优化的神经机器翻译(NMT)容易出现不可最大化的标记或低频词精度差等问题,这会导致生成的翻译缺乏词级别的多样性。词频在训练数据上的不均衡分布是造成上述现象的原因之一。本文旨在通过限制词频对 NMT 解码时估计概率的影响来缓解上述问题。具体地,我们采用了基于因果推断理论的半同胞回归去噪框架,并结合本文提出的自适应去噪系数来控制词频对模型估计概率的影响,以获得更准确的模型估计概率,并丰富 NMT 译文用词的多样性。本文的实验在四个代表不同资源规模的翻译任务上进行,分别是维吾尔语-汉语、汉语-英语、英语-德语和英语-法语。实验结果表明,本文所提出的方法在提升 NMT 译文词级别多样性的同时,不会损害译文的质量。另外,本文提出的方法还具有模型无关、可解释性强等优点。{''}",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shi-etal-2023-ji">
<titleInfo>
<title>基于词频效应控制的神经机器翻译用词多样性增强方法(Improving Word-level Diversity in Neural Machine Translation by Controlling the Effects of Word Frequency)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xuewen</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ping</namePart>
<namePart type="family">Jian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yikun</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heyan</namePart>
<namePart type="family">HUang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Harbin, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“通过最大似然估计优化的神经机器翻译(NMT)容易出现不可最大化的标记或低频词精度差等问题,这会导致生成的翻译缺乏词级别的多样性。词频在训练数据上的不均衡分布是造成上述现象的原因之一。本文旨在通过限制词频对 NMT 解码时估计概率的影响来缓解上述问题。具体地,我们采用了基于因果推断理论的半同胞回归去噪框架,并结合本文提出的自适应去噪系数来控制词频对模型估计概率的影响,以获得更准确的模型估计概率,并丰富 NMT 译文用词的多样性。本文的实验在四个代表不同资源规模的翻译任务上进行,分别是维吾尔语-汉语、汉语-英语、英语-德语和英语-法语。实验结果表明,本文所提出的方法在提升 NMT 译文词级别多样性的同时,不会损害译文的质量。另外,本文提出的方法还具有模型无关、可解释性强等优点。”</abstract>
<identifier type="citekey">shi-etal-2023-ji</identifier>
<location>
<url>https://aclanthology.org/2023.ccl-1.6</url>
</location>
<part>
<date>2023-08</date>
<extent unit="page">
<start>64</start>
<end>77</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 基于词频效应控制的神经机器翻译用词多样性增强方法(Improving Word-level Diversity in Neural Machine Translation by Controlling the Effects of Word Frequency)
%A Shi, Xuewen
%A Jian, Ping
%A Tang, Yikun
%A HUang, Heyan
%Y Sun, Maosong
%Y Qin, Bing
%Y Qiu, Xipeng
%Y Jiang, Jing
%Y Han, Xianpei
%S Proceedings of the 22nd Chinese National Conference on Computational Linguistics
%D 2023
%8 August
%I Chinese Information Processing Society of China
%C Harbin, China
%G Chinese
%F shi-etal-2023-ji
%X “通过最大似然估计优化的神经机器翻译(NMT)容易出现不可最大化的标记或低频词精度差等问题,这会导致生成的翻译缺乏词级别的多样性。词频在训练数据上的不均衡分布是造成上述现象的原因之一。本文旨在通过限制词频对 NMT 解码时估计概率的影响来缓解上述问题。具体地,我们采用了基于因果推断理论的半同胞回归去噪框架,并结合本文提出的自适应去噪系数来控制词频对模型估计概率的影响,以获得更准确的模型估计概率,并丰富 NMT 译文用词的多样性。本文的实验在四个代表不同资源规模的翻译任务上进行,分别是维吾尔语-汉语、汉语-英语、英语-德语和英语-法语。实验结果表明,本文所提出的方法在提升 NMT 译文词级别多样性的同时,不会损害译文的质量。另外,本文提出的方法还具有模型无关、可解释性强等优点。”
%U https://aclanthology.org/2023.ccl-1.6
%P 64-77
Markdown (Informal)
[基于词频效应控制的神经机器翻译用词多样性增强方法(Improving Word-level Diversity in Neural Machine Translation by Controlling the Effects of Word Frequency)](https://aclanthology.org/2023.ccl-1.6) (Shi et al., CCL 2023)
ACL