@inproceedings{kurzynski-2023-stylometry,
title = "The Stylometry of Maoism: Quantifying the Language of Mao Zedong",
author = "Kurzynski, Maciej",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Pirinen, Flammie and
Alnajjar, Khalid and
Miyagawa, So and
Bizzoni, Yuri and
Partanen, Niko and
Rueter, Jack},
booktitle = "Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages",
month = dec,
year = "2023",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.nlp4dh-1.9",
pages = "76--81",
abstract = "Recent advances in computational stylometry have enabled scholars to detect authorial signals with a high degree of precision, but the focus on accuracy comes at the expense of explainability: powerful black-box models are often of little use to traditional humanistic disciplines. With this in mind, we have conducted stylometric experiments on Maospeak, a language style shaped by the writings and speeches of Mao Zedong. We measure per-token perplexity across different GPT models, compute Kullback{--}Leibler divergences between local and global vocabulary distributions, and train a TF-IDF classifier to examine how the modern Chinese language has been transformed to convey the tenets of Maoist doctrine. We offer a computational interpretation of ideology as reduction in perplexity and increase in systematicity of language use.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kurzynski-2023-stylometry">
<titleInfo>
<title>The Stylometry of Maoism: Quantifying the Language of Mao Zedong</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Kurzynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flammie</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Partanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Rueter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent advances in computational stylometry have enabled scholars to detect authorial signals with a high degree of precision, but the focus on accuracy comes at the expense of explainability: powerful black-box models are often of little use to traditional humanistic disciplines. With this in mind, we have conducted stylometric experiments on Maospeak, a language style shaped by the writings and speeches of Mao Zedong. We measure per-token perplexity across different GPT models, compute Kullback–Leibler divergences between local and global vocabulary distributions, and train a TF-IDF classifier to examine how the modern Chinese language has been transformed to convey the tenets of Maoist doctrine. We offer a computational interpretation of ideology as reduction in perplexity and increase in systematicity of language use.</abstract>
<identifier type="citekey">kurzynski-2023-stylometry</identifier>
<location>
<url>https://aclanthology.org/2023.nlp4dh-1.9</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>76</start>
<end>81</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Stylometry of Maoism: Quantifying the Language of Mao Zedong
%A Kurzynski, Maciej
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Pirinen, Flammie
%Y Alnajjar, Khalid
%Y Miyagawa, So
%Y Bizzoni, Yuri
%Y Partanen, Niko
%Y Rueter, Jack
%S Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages
%D 2023
%8 December
%I Association for Computational Linguistics
%C Tokyo, Japan
%F kurzynski-2023-stylometry
%X Recent advances in computational stylometry have enabled scholars to detect authorial signals with a high degree of precision, but the focus on accuracy comes at the expense of explainability: powerful black-box models are often of little use to traditional humanistic disciplines. With this in mind, we have conducted stylometric experiments on Maospeak, a language style shaped by the writings and speeches of Mao Zedong. We measure per-token perplexity across different GPT models, compute Kullback–Leibler divergences between local and global vocabulary distributions, and train a TF-IDF classifier to examine how the modern Chinese language has been transformed to convey the tenets of Maoist doctrine. We offer a computational interpretation of ideology as reduction in perplexity and increase in systematicity of language use.
%U https://aclanthology.org/2023.nlp4dh-1.9
%P 76-81
Markdown (Informal)
[The Stylometry of Maoism: Quantifying the Language of Mao Zedong](https://aclanthology.org/2023.nlp4dh-1.9) (Kurzynski, NLP4DH-IWCLUL 2023)
ACL
- Maciej Kurzynski. 2023. The Stylometry of Maoism: Quantifying the Language of Mao Zedong. In Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages, pages 76–81, Tokyo, Japan. Association for Computational Linguistics.