@inproceedings{yang-etal-2025-recontextualizing,
title = {Recontextualizing Revitalization: A Mixed Media Approach to Reviving the N{\"u}shu Language},
author = "Yang, Ivory and
Guo, Xiaobo and
Wang, Yuxin and
Zhang, Hefan and
Jia, Yaning and
Dinauer, William and
Vosoughi, Soroush",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.627/",
pages = "12430--12439",
ISBN = "979-8-89176-332-6",
abstract = {N{\"u}shu is an endangered language from Jiangyong County, China, and the world{'}s only known writing system created and used exclusively by women. Recent Natural Language Processing (NLP) work has digitized small N{\"u}shu-Chinese corpora, but the script remains computationally inaccessible due to its handwritten, mixed-media form and dearth of multimodal resources. We address this gap with two novel datasets: N{\"u}shuVision, an image corpus of 500 rendered sentences in traditional vertical, right-to-left orthography, and N{\"u}shuStrokes, the first sequential handwriting recordings of all 397 Unicode N{\"u}shu characters by an expert calligrapher. Evaluating five state-of-the-art Chinese Optical Character Recognition (OCR) systems on N{\"u}shuVision shows that all fail entirely, each yielding a Character Error Rate (CER) of 1.0. Fine-tuning Microsoft{'}s TrOCR on N{\"u}shuVision lowers CER to 0.67, a modest yet meaningful improvement. These contributions establish the first multimodal foundation for N{\"u}shu revitalization and offer a culturally grounded framework for language preservation.}
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2025-recontextualizing">
<titleInfo>
<title>Recontextualizing Revitalization: A Mixed Media Approach to Reviving the Nüshu Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivory</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaobo</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hefan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaning</namePart>
<namePart type="family">Jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Dinauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soroush</namePart>
<namePart type="family">Vosoughi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Nüshu is an endangered language from Jiangyong County, China, and the world’s only known writing system created and used exclusively by women. Recent Natural Language Processing (NLP) work has digitized small Nüshu-Chinese corpora, but the script remains computationally inaccessible due to its handwritten, mixed-media form and dearth of multimodal resources. We address this gap with two novel datasets: NüshuVision, an image corpus of 500 rendered sentences in traditional vertical, right-to-left orthography, and NüshuStrokes, the first sequential handwriting recordings of all 397 Unicode Nüshu characters by an expert calligrapher. Evaluating five state-of-the-art Chinese Optical Character Recognition (OCR) systems on NüshuVision shows that all fail entirely, each yielding a Character Error Rate (CER) of 1.0. Fine-tuning Microsoft’s TrOCR on NüshuVision lowers CER to 0.67, a modest yet meaningful improvement. These contributions establish the first multimodal foundation for Nüshu revitalization and offer a culturally grounded framework for language preservation.</abstract>
<identifier type="citekey">yang-etal-2025-recontextualizing</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.627/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>12430</start>
<end>12439</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Recontextualizing Revitalization: A Mixed Media Approach to Reviving the Nüshu Language
%A Yang, Ivory
%A Guo, Xiaobo
%A Wang, Yuxin
%A Zhang, Hefan
%A Jia, Yaning
%A Dinauer, William
%A Vosoughi, Soroush
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F yang-etal-2025-recontextualizing
%X Nüshu is an endangered language from Jiangyong County, China, and the world’s only known writing system created and used exclusively by women. Recent Natural Language Processing (NLP) work has digitized small Nüshu-Chinese corpora, but the script remains computationally inaccessible due to its handwritten, mixed-media form and dearth of multimodal resources. We address this gap with two novel datasets: NüshuVision, an image corpus of 500 rendered sentences in traditional vertical, right-to-left orthography, and NüshuStrokes, the first sequential handwriting recordings of all 397 Unicode Nüshu characters by an expert calligrapher. Evaluating five state-of-the-art Chinese Optical Character Recognition (OCR) systems on NüshuVision shows that all fail entirely, each yielding a Character Error Rate (CER) of 1.0. Fine-tuning Microsoft’s TrOCR on NüshuVision lowers CER to 0.67, a modest yet meaningful improvement. These contributions establish the first multimodal foundation for Nüshu revitalization and offer a culturally grounded framework for language preservation.
%U https://aclanthology.org/2025.emnlp-main.627/
%P 12430-12439
Markdown (Informal)
[Recontextualizing Revitalization: A Mixed Media Approach to Reviving the Nüshu Language](https://aclanthology.org/2025.emnlp-main.627/) (Yang et al., EMNLP 2025)
ACL