@inproceedings{bauer-etal-2025-universal,
title = "{U}niversal {D}ependencies for {S}indhi",
author = "Bauer, John and
Shah, Sakiina and
Shaheer, Muhammad and
Talpur, Mir Afza Ahmed and
Sanjrani, Zubair and
Qureshi, Sarwat and
Pirzada, Shafi and
Manning, Christopher D. and
Rahman, Mutee U",
editor = {Bouma, Gosse and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
booktitle = "Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025)",
month = aug,
year = "2025",
address = "Ljubljana, Slovenia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.udw-1.11/",
pages = "105--118",
ISBN = "979-8-89176-292-3",
abstract = "Sindhi is an Indo-Aryan language spoken primarily in Pakistan and India by about 40 million people. Despite this extensive use, it is a low resource language for NLP tasks, with few datasets or pretrained embeddings available. In this work, we explore linguistic challenges for annotating Sindhi in the UD paradigm, such as language-specific analysis of adpositions and verb forms. We use this analysis to present a newly annotated dependency treebank for Universal Dependencies, along with pretrained embeddings and an annotation pipeline specifically for Sindhi annotation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bauer-etal-2025-universal">
<titleInfo>
<title>Universal Dependencies for Sindhi</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Bauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakiina</namePart>
<namePart type="family">Shah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Shaheer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mir</namePart>
<namePart type="given">Afza</namePart>
<namePart type="given">Ahmed</namePart>
<namePart type="family">Talpur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zubair</namePart>
<namePart type="family">Sanjrani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarwat</namePart>
<namePart type="family">Qureshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shafi</namePart>
<namePart type="family">Pirzada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Manning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mutee</namePart>
<namePart type="given">U</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gosse</namePart>
<namePart type="family">Bouma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Ljubljana, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-292-3</identifier>
</relatedItem>
<abstract>Sindhi is an Indo-Aryan language spoken primarily in Pakistan and India by about 40 million people. Despite this extensive use, it is a low resource language for NLP tasks, with few datasets or pretrained embeddings available. In this work, we explore linguistic challenges for annotating Sindhi in the UD paradigm, such as language-specific analysis of adpositions and verb forms. We use this analysis to present a newly annotated dependency treebank for Universal Dependencies, along with pretrained embeddings and an annotation pipeline specifically for Sindhi annotation.</abstract>
<identifier type="citekey">bauer-etal-2025-universal</identifier>
<location>
<url>https://aclanthology.org/2025.udw-1.11/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>105</start>
<end>118</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Universal Dependencies for Sindhi
%A Bauer, John
%A Shah, Sakiina
%A Shaheer, Muhammad
%A Talpur, Mir Afza Ahmed
%A Sanjrani, Zubair
%A Qureshi, Sarwat
%A Pirzada, Shafi
%A Manning, Christopher D.
%A Rahman, Mutee U.
%Y Bouma, Gosse
%Y Çöltekin, Çağrı
%S Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Ljubljana, Slovenia
%@ 979-8-89176-292-3
%F bauer-etal-2025-universal
%X Sindhi is an Indo-Aryan language spoken primarily in Pakistan and India by about 40 million people. Despite this extensive use, it is a low resource language for NLP tasks, with few datasets or pretrained embeddings available. In this work, we explore linguistic challenges for annotating Sindhi in the UD paradigm, such as language-specific analysis of adpositions and verb forms. We use this analysis to present a newly annotated dependency treebank for Universal Dependencies, along with pretrained embeddings and an annotation pipeline specifically for Sindhi annotation.
%U https://aclanthology.org/2025.udw-1.11/
%P 105-118
Markdown (Informal)
[Universal Dependencies for Sindhi](https://aclanthology.org/2025.udw-1.11/) (Bauer et al., UDW-SyntaxFest 2025)
ACL
- John Bauer, Sakiina Shah, Muhammad Shaheer, Mir Afza Ahmed Talpur, Zubair Sanjrani, Sarwat Qureshi, Shafi Pirzada, Christopher D. Manning, and Mutee U Rahman. 2025. Universal Dependencies for Sindhi. In Proceedings of the Eighth Workshop on Universal Dependencies (UDW, SyntaxFest 2025), pages 105–118, Ljubljana, Slovenia. Association for Computational Linguistics.