@inproceedings{zhuocheng-etal-2023-scaling,
title = "Scaling Law for Document Neural Machine Translation",
author = "Zhuocheng, Zhang and
Gu, Shuhao and
Zhang, Min and
Feng, Yang",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.556",
doi = "10.18653/v1/2023.findings-emnlp.556",
pages = "8290--8303",
abstract = "The scaling laws of language models have played a significant role in advancing large language models. In order to promote the development of document translation, we systematically examine the scaling laws in this field. In this paper, we carry out an in-depth analysis of the influence of three factors on translation quality: model scale, data scale, and sequence length. Our findings reveal that increasing sequence length effectively enhances model performance when model size is limited. However, sequence length cannot be infinitely extended; it must be suitably aligned with the model scale and corpus volume. Further research shows that providing adequate context can effectively enhance the translation quality of a document{'}s initial portion. Nonetheless, exposure bias remains the primary factor hindering further improvement in translation quality for the latter half of the document.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhuocheng-etal-2023-scaling">
<titleInfo>
<title>Scaling Law for Document Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhang</namePart>
<namePart type="family">Zhuocheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuhao</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The scaling laws of language models have played a significant role in advancing large language models. In order to promote the development of document translation, we systematically examine the scaling laws in this field. In this paper, we carry out an in-depth analysis of the influence of three factors on translation quality: model scale, data scale, and sequence length. Our findings reveal that increasing sequence length effectively enhances model performance when model size is limited. However, sequence length cannot be infinitely extended; it must be suitably aligned with the model scale and corpus volume. Further research shows that providing adequate context can effectively enhance the translation quality of a document’s initial portion. Nonetheless, exposure bias remains the primary factor hindering further improvement in translation quality for the latter half of the document.</abstract>
<identifier type="citekey">zhuocheng-etal-2023-scaling</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.556</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.556</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>8290</start>
<end>8303</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scaling Law for Document Neural Machine Translation
%A Zhuocheng, Zhang
%A Gu, Shuhao
%A Zhang, Min
%A Feng, Yang
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F zhuocheng-etal-2023-scaling
%X The scaling laws of language models have played a significant role in advancing large language models. In order to promote the development of document translation, we systematically examine the scaling laws in this field. In this paper, we carry out an in-depth analysis of the influence of three factors on translation quality: model scale, data scale, and sequence length. Our findings reveal that increasing sequence length effectively enhances model performance when model size is limited. However, sequence length cannot be infinitely extended; it must be suitably aligned with the model scale and corpus volume. Further research shows that providing adequate context can effectively enhance the translation quality of a document’s initial portion. Nonetheless, exposure bias remains the primary factor hindering further improvement in translation quality for the latter half of the document.
%R 10.18653/v1/2023.findings-emnlp.556
%U https://aclanthology.org/2023.findings-emnlp.556
%U https://doi.org/10.18653/v1/2023.findings-emnlp.556
%P 8290-8303
Markdown (Informal)
[Scaling Law for Document Neural Machine Translation](https://aclanthology.org/2023.findings-emnlp.556) (Zhuocheng et al., Findings 2023)
ACL
- Zhang Zhuocheng, Shuhao Gu, Min Zhang, and Yang Feng. 2023. Scaling Law for Document Neural Machine Translation. In Findings of the Association for Computational Linguistics: EMNLP 2023, pages 8290–8303, Singapore. Association for Computational Linguistics.