@inproceedings{yan-chen-2025-modeling,
title = "Modeling the Law of Abbreviation in Classical, Modern, and {C}hat{GPT}-Generated {C}hinese: A Power-Law Analysis of Structural Economy",
author = "Yan, Jianwei and
Chen, Heng",
editor = "Chen, Xinying and
Wang, Yaqin",
booktitle = "Proceedings of the Third Workshop on Quantitative Syntax (QUASY, SyntaxFest 2025)",
month = aug,
year = "2025",
address = "Ljubljana, Slovenia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.quasy-1.8/",
pages = "56--62",
ISBN = "979-8-89176-293-0",
abstract = "This study investigates the Law of Abbreviation{---}the inverse relationship between word length and frequency{---}across Classical, Modern, and ChatGPT-generated Chinese. Using a tri-partite parallel corpus and a power-law model y = a*x{\textasciicircum}(-b), we analyze the relationship between word length and the average usage frequency of words within a given word length category to assess structural economy. Results confirm consistent Zipfian distribution across all text types, with high R2 values indicating strong model fit. However, the parameter b varies significantly: Classical Chinese shows the steepest decline, suggesting strong pressure for brevity; Modern Chinese exhibits a moderated pattern; ChatGPT-generated texts display the weakest pressure, prioritizing fluency over compression. These differences reflect evolving communicative priorities and reveal that while AI models can mimic statistical distributions, they underrepresent deeper structural pressures found in natural language evolution. This study offers new insights into lexical optimization and the parameter b offers a useful metric for comparing structural efficiency across modalities. Implications are discussed in relation to language modeling, cognitive economy, and the evolution of linguistic structure."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yan-chen-2025-modeling">
<titleInfo>
<title>Modeling the Law of Abbreviation in Classical, Modern, and ChatGPT-Generated Chinese: A Power-Law Analysis of Structural Economy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jianwei</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Quantitative Syntax (QUASY, SyntaxFest 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinying</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaqin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Ljubljana, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-293-0</identifier>
</relatedItem>
<abstract>This study investigates the Law of Abbreviation—the inverse relationship between word length and frequency—across Classical, Modern, and ChatGPT-generated Chinese. Using a tri-partite parallel corpus and a power-law model y = a*x⌃(-b), we analyze the relationship between word length and the average usage frequency of words within a given word length category to assess structural economy. Results confirm consistent Zipfian distribution across all text types, with high R2 values indicating strong model fit. However, the parameter b varies significantly: Classical Chinese shows the steepest decline, suggesting strong pressure for brevity; Modern Chinese exhibits a moderated pattern; ChatGPT-generated texts display the weakest pressure, prioritizing fluency over compression. These differences reflect evolving communicative priorities and reveal that while AI models can mimic statistical distributions, they underrepresent deeper structural pressures found in natural language evolution. This study offers new insights into lexical optimization and the parameter b offers a useful metric for comparing structural efficiency across modalities. Implications are discussed in relation to language modeling, cognitive economy, and the evolution of linguistic structure.</abstract>
<identifier type="citekey">yan-chen-2025-modeling</identifier>
<location>
<url>https://aclanthology.org/2025.quasy-1.8/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>56</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modeling the Law of Abbreviation in Classical, Modern, and ChatGPT-Generated Chinese: A Power-Law Analysis of Structural Economy
%A Yan, Jianwei
%A Chen, Heng
%Y Chen, Xinying
%Y Wang, Yaqin
%S Proceedings of the Third Workshop on Quantitative Syntax (QUASY, SyntaxFest 2025)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Ljubljana, Slovenia
%@ 979-8-89176-293-0
%F yan-chen-2025-modeling
%X This study investigates the Law of Abbreviation—the inverse relationship between word length and frequency—across Classical, Modern, and ChatGPT-generated Chinese. Using a tri-partite parallel corpus and a power-law model y = a*x⌃(-b), we analyze the relationship between word length and the average usage frequency of words within a given word length category to assess structural economy. Results confirm consistent Zipfian distribution across all text types, with high R2 values indicating strong model fit. However, the parameter b varies significantly: Classical Chinese shows the steepest decline, suggesting strong pressure for brevity; Modern Chinese exhibits a moderated pattern; ChatGPT-generated texts display the weakest pressure, prioritizing fluency over compression. These differences reflect evolving communicative priorities and reveal that while AI models can mimic statistical distributions, they underrepresent deeper structural pressures found in natural language evolution. This study offers new insights into lexical optimization and the parameter b offers a useful metric for comparing structural efficiency across modalities. Implications are discussed in relation to language modeling, cognitive economy, and the evolution of linguistic structure.
%U https://aclanthology.org/2025.quasy-1.8/
%P 56-62
Markdown (Informal)
[Modeling the Law of Abbreviation in Classical, Modern, and ChatGPT-Generated Chinese: A Power-Law Analysis of Structural Economy](https://aclanthology.org/2025.quasy-1.8/) (Yan & Chen, Quasy-SyntaxFest 2025)
ACL