@inproceedings{yang-2025-shud,
title = "{S}h{UD}: the First Shanghainese {U}niversal {D}ependency Treebank",
author = "Yang, Qizhen",
editor = {Bouma, Gosse and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
booktitle = "Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025)",
month = aug,
year = "2025",
address = "Ljubljana, Slovenia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.udw-1.20/",
pages = "186--193",
ISBN = "979-8-89176-292-3",
abstract = "This paper introduces ShUD, the first Universal Dependencies (UD) treebank for Shanghainese, a Wu Chinese variant spoken by approximately 14 million people but severely under-resourced in NLP. The treebank is built through a scalable annotation pipeline that exploits grammatical parallels between Shanghainese and Mandarin. Our pipeline also provides a practical strategy for bootstrapping resources for other Chinese dialects. We documented syntactic phenomena unique to Shanghainese within the UD framework and fine-tuned a dependency parser using our annotated treebank, contributing a foundation to both NLP tool development and cross-linguistic syntactic research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-2025-shud">
<titleInfo>
<title>ShUD: the First Shanghainese Universal Dependency Treebank</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qizhen</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gosse</namePart>
<namePart type="family">Bouma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Ljubljana, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-292-3</identifier>
</relatedItem>
<abstract>This paper introduces ShUD, the first Universal Dependencies (UD) treebank for Shanghainese, a Wu Chinese variant spoken by approximately 14 million people but severely under-resourced in NLP. The treebank is built through a scalable annotation pipeline that exploits grammatical parallels between Shanghainese and Mandarin. Our pipeline also provides a practical strategy for bootstrapping resources for other Chinese dialects. We documented syntactic phenomena unique to Shanghainese within the UD framework and fine-tuned a dependency parser using our annotated treebank, contributing a foundation to both NLP tool development and cross-linguistic syntactic research.</abstract>
<identifier type="citekey">yang-2025-shud</identifier>
<location>
<url>https://aclanthology.org/2025.udw-1.20/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>186</start>
<end>193</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ShUD: the First Shanghainese Universal Dependency Treebank
%A Yang, Qizhen
%Y Bouma, Gosse
%Y Çöltekin, Çağrı
%S Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Ljubljana, Slovenia
%@ 979-8-89176-292-3
%F yang-2025-shud
%X This paper introduces ShUD, the first Universal Dependencies (UD) treebank for Shanghainese, a Wu Chinese variant spoken by approximately 14 million people but severely under-resourced in NLP. The treebank is built through a scalable annotation pipeline that exploits grammatical parallels between Shanghainese and Mandarin. Our pipeline also provides a practical strategy for bootstrapping resources for other Chinese dialects. We documented syntactic phenomena unique to Shanghainese within the UD framework and fine-tuned a dependency parser using our annotated treebank, contributing a foundation to both NLP tool development and cross-linguistic syntactic research.
%U https://aclanthology.org/2025.udw-1.20/
%P 186-193
Markdown (Informal)
[ShUD: the First Shanghainese Universal Dependency Treebank](https://aclanthology.org/2025.udw-1.20/) (Yang, UDW-SyntaxFest 2025)
ACL