@inproceedings{jobanputra-etal-2024-universal,
title = "A {U}niversal {D}ependencies Treebank for {G}ujarati",
author = {Jobanputra, Mayank and
Mehta, Maitrey and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
editor = {Bhatia, Archna and
Bouma, Gosse and
Do{\u{g}}ru{\"o}z, A. Seza and
Evang, Kilian and
Garcia, Marcos and
Giouli, Voula and
Han, Lifeng and
Nivre, Joakim and
Rademaker, Alexandre},
booktitle = "Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.mwe-1.9",
pages = "56--62",
abstract = "The Universal Dependencies (UD) project has presented itself as a valuable platform to develop various resources for the languages of the world. We present and release a sample treebank for the Indo-Aryan language of Gujarati {--} a widely spoken language with little linguistic resources. This treebank is the first labeled dataset for dependency parsing in the language and the script (the Gujarati script). The treebank contains 187 part-of-speech and dependency annotated sentences from diverse genres. We discuss various idiosyncratic examples, annotation choices and present an elaborate corpus along with agreement statistics. We see this work as a valuable resource and a stepping stone for research in Gujarati Computational Linguistics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jobanputra-etal-2024-universal">
<titleInfo>
<title>A Universal Dependencies Treebank for Gujarati</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mayank</namePart>
<namePart type="family">Jobanputra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maitrey</namePart>
<namePart type="family">Mehta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Archna</namePart>
<namePart type="family">Bhatia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gosse</namePart>
<namePart type="family">Bouma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kilian</namePart>
<namePart type="family">Evang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Voula</namePart>
<namePart type="family">Giouli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lifeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Nivre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Rademaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Universal Dependencies (UD) project has presented itself as a valuable platform to develop various resources for the languages of the world. We present and release a sample treebank for the Indo-Aryan language of Gujarati – a widely spoken language with little linguistic resources. This treebank is the first labeled dataset for dependency parsing in the language and the script (the Gujarati script). The treebank contains 187 part-of-speech and dependency annotated sentences from diverse genres. We discuss various idiosyncratic examples, annotation choices and present an elaborate corpus along with agreement statistics. We see this work as a valuable resource and a stepping stone for research in Gujarati Computational Linguistics.</abstract>
<identifier type="citekey">jobanputra-etal-2024-universal</identifier>
<location>
<url>https://aclanthology.org/2024.mwe-1.9</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>56</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Universal Dependencies Treebank for Gujarati
%A Jobanputra, Mayank
%A Mehta, Maitrey
%A Çöltekin, Çağrı
%Y Bhatia, Archna
%Y Bouma, Gosse
%Y Doğruöz, A. Seza
%Y Evang, Kilian
%Y Garcia, Marcos
%Y Giouli, Voula
%Y Han, Lifeng
%Y Nivre, Joakim
%Y Rademaker, Alexandre
%S Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F jobanputra-etal-2024-universal
%X The Universal Dependencies (UD) project has presented itself as a valuable platform to develop various resources for the languages of the world. We present and release a sample treebank for the Indo-Aryan language of Gujarati – a widely spoken language with little linguistic resources. This treebank is the first labeled dataset for dependency parsing in the language and the script (the Gujarati script). The treebank contains 187 part-of-speech and dependency annotated sentences from diverse genres. We discuss various idiosyncratic examples, annotation choices and present an elaborate corpus along with agreement statistics. We see this work as a valuable resource and a stepping stone for research in Gujarati Computational Linguistics.
%U https://aclanthology.org/2024.mwe-1.9
%P 56-62
Markdown (Informal)
[A Universal Dependencies Treebank for Gujarati](https://aclanthology.org/2024.mwe-1.9) (Jobanputra et al., MWE-UDW-WS 2024)
ACL
- Mayank Jobanputra, Maitrey Mehta, and Çağrı Çöltekin. 2024. A Universal Dependencies Treebank for Gujarati. In Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024, pages 56–62, Torino, Italia. ELRA and ICCL.