@inproceedings{ashida-etal-2020-building,
title = "Building a Part-of-Speech Tagged Corpus for Drenjongke (Bhutia)",
author = "Ashida, Mana and
Lee, Seunghun and
Namgyal, Kunzang",
editor = "Shmueli, Boaz and
Huang, Yin Jou",
booktitle = "Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.aacl-srw.9",
pages = "57--63",
abstract = "This research paper reports on the generation of the first Drenjongke corpus based on texts taken from a phrase book for beginners, written in the Tibetan script. A corpus of sentences was created after correcting errors in the text scanned through optical character reading (OCR). A total of 34 Part-of-Speech (PoS) tags were defined based on manual annotation performed by the three authors, one of whom is a native speaker of Drenjongke. The first corpus of the Drenjongke language comprises 275 sentences and 1379 tokens, which we plan to expand with other materials to promote further studies of this language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ashida-etal-2020-building">
<titleInfo>
<title>Building a Part-of-Speech Tagged Corpus for Drenjongke (Bhutia)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mana</namePart>
<namePart type="family">Ashida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seunghun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kunzang</namePart>
<namePart type="family">Namgyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Boaz</namePart>
<namePart type="family">Shmueli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yin</namePart>
<namePart type="given">Jou</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This research paper reports on the generation of the first Drenjongke corpus based on texts taken from a phrase book for beginners, written in the Tibetan script. A corpus of sentences was created after correcting errors in the text scanned through optical character reading (OCR). A total of 34 Part-of-Speech (PoS) tags were defined based on manual annotation performed by the three authors, one of whom is a native speaker of Drenjongke. The first corpus of the Drenjongke language comprises 275 sentences and 1379 tokens, which we plan to expand with other materials to promote further studies of this language.</abstract>
<identifier type="citekey">ashida-etal-2020-building</identifier>
<location>
<url>https://aclanthology.org/2020.aacl-srw.9</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>57</start>
<end>63</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Part-of-Speech Tagged Corpus for Drenjongke (Bhutia)
%A Ashida, Mana
%A Lee, Seunghun
%A Namgyal, Kunzang
%Y Shmueli, Boaz
%Y Huang, Yin Jou
%S Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F ashida-etal-2020-building
%X This research paper reports on the generation of the first Drenjongke corpus based on texts taken from a phrase book for beginners, written in the Tibetan script. A corpus of sentences was created after correcting errors in the text scanned through optical character reading (OCR). A total of 34 Part-of-Speech (PoS) tags were defined based on manual annotation performed by the three authors, one of whom is a native speaker of Drenjongke. The first corpus of the Drenjongke language comprises 275 sentences and 1379 tokens, which we plan to expand with other materials to promote further studies of this language.
%U https://aclanthology.org/2020.aacl-srw.9
%P 57-63
Markdown (Informal)
[Building a Part-of-Speech Tagged Corpus for Drenjongke (Bhutia)](https://aclanthology.org/2020.aacl-srw.9) (Ashida et al., AACL 2020)
ACL
- Mana Ashida, Seunghun Lee, and Kunzang Namgyal. 2020. Building a Part-of-Speech Tagged Corpus for Drenjongke (Bhutia). In Proceedings of the 1st Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 10th International Joint Conference on Natural Language Processing: Student Research Workshop, pages 57–63, Suzhou, China. Association for Computational Linguistics.