@inproceedings{nishioka-akasegawa-2016-development,
title = "The development of a web corpus of {H}indi language and corpus-based comparative studies to {J}apanese",
author = "Nishioka, Miki and
Akasegawa, Shiro",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-3712",
pages = "114--123",
abstract = "In this paper, we discuss our creation of a web corpus of spoken Hindi (COSH), one of the Indo-Aryan languages spoken mainly in the Indian subcontinent. We also point out notable problems we{'}ve encountered in the web corpus and the special concordancer. After observing the kind of technical problems we encountered, especially regarding annotation tagged by Shiva Reddy{'}s tagger, we argue how they can be solved when using COSH for linguistic studies. Finally, we mention the kinds of linguistic research that we non-native speakers of Hindi can do using the corpus, especially in pragmatics and semantics, and from a comparative viewpoint to Japanese.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nishioka-akasegawa-2016-development">
<titleInfo>
<title>The development of a web corpus of Hindi language and corpus-based comparative studies to Japanese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miki</namePart>
<namePart type="family">Nishioka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiro</namePart>
<namePart type="family">Akasegawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we discuss our creation of a web corpus of spoken Hindi (COSH), one of the Indo-Aryan languages spoken mainly in the Indian subcontinent. We also point out notable problems we’ve encountered in the web corpus and the special concordancer. After observing the kind of technical problems we encountered, especially regarding annotation tagged by Shiva Reddy’s tagger, we argue how they can be solved when using COSH for linguistic studies. Finally, we mention the kinds of linguistic research that we non-native speakers of Hindi can do using the corpus, especially in pragmatics and semantics, and from a comparative viewpoint to Japanese.</abstract>
<identifier type="citekey">nishioka-akasegawa-2016-development</identifier>
<location>
<url>https://aclanthology.org/W16-3712</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>114</start>
<end>123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The development of a web corpus of Hindi language and corpus-based comparative studies to Japanese
%A Nishioka, Miki
%A Akasegawa, Shiro
%Y Wu, Dekai
%Y Bhattacharyya, Pushpak
%S Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F nishioka-akasegawa-2016-development
%X In this paper, we discuss our creation of a web corpus of spoken Hindi (COSH), one of the Indo-Aryan languages spoken mainly in the Indian subcontinent. We also point out notable problems we’ve encountered in the web corpus and the special concordancer. After observing the kind of technical problems we encountered, especially regarding annotation tagged by Shiva Reddy’s tagger, we argue how they can be solved when using COSH for linguistic studies. Finally, we mention the kinds of linguistic research that we non-native speakers of Hindi can do using the corpus, especially in pragmatics and semantics, and from a comparative viewpoint to Japanese.
%U https://aclanthology.org/W16-3712
%P 114-123
Markdown (Informal)
[The development of a web corpus of Hindi language and corpus-based comparative studies to Japanese](https://aclanthology.org/W16-3712) (Nishioka & Akasegawa, WSSANLP 2016)
ACL