@inproceedings{zou-etal-2025-genaiese,
title = "{G}en{AI}ese - A Comprehensive Comparison of {GPT}-4o and {D}eep{S}eek-V3 for {E}nglish-to-{C}hinese Academic Translation",
author = "Zou, Longhui and
Li, Ke and
Lamerton, Joshua and
Mirzapour, Mehdi",
editor = "Tsunakawa, Takashi and
Sudoh, Katsuhito and
Goto, Isao",
booktitle = "Proceedings of the Eleventh Workshop on Patent and Scientific Literature Translation (PSLT 2025)",
month = jun,
year = "2025",
address = "Geneva, Switzerland",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2025.pslt-1.1/",
pages = "1--12",
ISBN = "978-2-9701897-2-5",
abstract = "This study investigates the translation performance of two large language models{--}ChatGPT-4o and DeepSeek-V3{--}in translating English academic papers on on language, culture, and literature into Chinese at the discourse level. Using a corpus of 11 academic texts totaling 3,498 sentences, we evaluated translation quality through automatic metrics (COMET-KIWI), lexical diversity indicators, and syntactic complexity measures. Our findings reveal an interesting contrast$\colon$ while DeepSeek-V3 achieves higher overall quality scores, GPT-4o produces translations with consistently greater lexical richness (higher type-token ratio, standardized TTR, average sentence length, and word entropy) and syntactic complexity across all five measured metrics, such as Incomplete Dependency Theory Metric (IDT), Dependency Locality Theory Metric (DLT), Combined IDT+DLT Metric (IDT+DLT), Left-Embeddedness (LE), and Nested Nouns Distance (NND). Particularly notable are GPT-4o{'}s higher scores in Left-Embeddedness and Nested Nouns Distance metrics, which are specifically relevant to Chinese linguistic patterns. The divergence between automatic quality estimation and linguistic complexity metrics highlights the multifaceted nature of translation quality assessment."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zou-etal-2025-genaiese">
<titleInfo>
<title>GenAIese - A Comprehensive Comparison of GPT-4o and DeepSeek-V3 for English-to-Chinese Academic Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Longhui</namePart>
<namePart type="family">Zou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Lamerton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehdi</namePart>
<namePart type="family">Mirzapour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eleventh Workshop on Patent and Scientific Literature Translation (PSLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Tsunakawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Geneva, Switzerland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-2-9701897-2-5</identifier>
</relatedItem>
<abstract>This study investigates the translation performance of two large language models–ChatGPT-4o and DeepSeek-V3–in translating English academic papers on on language, culture, and literature into Chinese at the discourse level. Using a corpus of 11 academic texts totaling 3,498 sentences, we evaluated translation quality through automatic metrics (COMET-KIWI), lexical diversity indicators, and syntactic complexity measures. Our findings reveal an interesting contrast\colon while DeepSeek-V3 achieves higher overall quality scores, GPT-4o produces translations with consistently greater lexical richness (higher type-token ratio, standardized TTR, average sentence length, and word entropy) and syntactic complexity across all five measured metrics, such as Incomplete Dependency Theory Metric (IDT), Dependency Locality Theory Metric (DLT), Combined IDT+DLT Metric (IDT+DLT), Left-Embeddedness (LE), and Nested Nouns Distance (NND). Particularly notable are GPT-4o’s higher scores in Left-Embeddedness and Nested Nouns Distance metrics, which are specifically relevant to Chinese linguistic patterns. The divergence between automatic quality estimation and linguistic complexity metrics highlights the multifaceted nature of translation quality assessment.</abstract>
<identifier type="citekey">zou-etal-2025-genaiese</identifier>
<location>
<url>https://aclanthology.org/2025.pslt-1.1/</url>
</location>
<part>
<date>2025-06</date>
<extent unit="page">
<start>1</start>
<end>12</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GenAIese - A Comprehensive Comparison of GPT-4o and DeepSeek-V3 for English-to-Chinese Academic Translation
%A Zou, Longhui
%A Li, Ke
%A Lamerton, Joshua
%A Mirzapour, Mehdi
%Y Tsunakawa, Takashi
%Y Sudoh, Katsuhito
%Y Goto, Isao
%S Proceedings of the Eleventh Workshop on Patent and Scientific Literature Translation (PSLT 2025)
%D 2025
%8 June
%I European Association for Machine Translation
%C Geneva, Switzerland
%@ 978-2-9701897-2-5
%F zou-etal-2025-genaiese
%X This study investigates the translation performance of two large language models–ChatGPT-4o and DeepSeek-V3–in translating English academic papers on on language, culture, and literature into Chinese at the discourse level. Using a corpus of 11 academic texts totaling 3,498 sentences, we evaluated translation quality through automatic metrics (COMET-KIWI), lexical diversity indicators, and syntactic complexity measures. Our findings reveal an interesting contrast\colon while DeepSeek-V3 achieves higher overall quality scores, GPT-4o produces translations with consistently greater lexical richness (higher type-token ratio, standardized TTR, average sentence length, and word entropy) and syntactic complexity across all five measured metrics, such as Incomplete Dependency Theory Metric (IDT), Dependency Locality Theory Metric (DLT), Combined IDT+DLT Metric (IDT+DLT), Left-Embeddedness (LE), and Nested Nouns Distance (NND). Particularly notable are GPT-4o’s higher scores in Left-Embeddedness and Nested Nouns Distance metrics, which are specifically relevant to Chinese linguistic patterns. The divergence between automatic quality estimation and linguistic complexity metrics highlights the multifaceted nature of translation quality assessment.
%U https://aclanthology.org/2025.pslt-1.1/
%P 1-12
Markdown (Informal)
[GenAIese - A Comprehensive Comparison of GPT-4o and DeepSeek-V3 for English-to-Chinese Academic Translation](https://aclanthology.org/2025.pslt-1.1/) (Zou et al., PSLT 2025)
ACL