@inproceedings{pal-sharma-2019-dataset,
title = "A Dataset for Semantic Role Labelling of {H}indi-{E}nglish Code-Mixed Tweets",
author = "Pal, Riya and
Sharma, Dipti",
editor = "Friedrich, Annemarie and
Zeyrek, Deniz and
Hoek, Jet",
booktitle = "Proceedings of the 13th Linguistic Annotation Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4020",
doi = "10.18653/v1/W19-4020",
pages = "178--188",
abstract = "We present a data set of 1460 Hindi-English code-mixed tweets consisting of 20,949 tokens labelled with Proposition Bank labels marking their semantic roles. We created verb frames for complex predicates present in the corpus and formulated mappings from Paninian dependency labels to Proposition Bank labels. With the help of these mappings and the dependency tree, we propose a baseline rule based system for Semantic Role Labelling of Hindi-English code-mixed data. We obtain an accuracy of 96.74{\%} for Argument Identification and are able to further classify 73.93{\%} of the labels correctly. While there is relevant ongoing research on Semantic Role Labelling and on building tools for code-mixed social media data, this is the first attempt at labelling semantic roles in code-mixed data, to the best of our knowledge.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pal-sharma-2019-dataset">
<titleInfo>
<title>A Dataset for Semantic Role Labelling of Hindi-English Code-Mixed Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Riya</namePart>
<namePart type="family">Pal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Linguistic Annotation Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Zeyrek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jet</namePart>
<namePart type="family">Hoek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a data set of 1460 Hindi-English code-mixed tweets consisting of 20,949 tokens labelled with Proposition Bank labels marking their semantic roles. We created verb frames for complex predicates present in the corpus and formulated mappings from Paninian dependency labels to Proposition Bank labels. With the help of these mappings and the dependency tree, we propose a baseline rule based system for Semantic Role Labelling of Hindi-English code-mixed data. We obtain an accuracy of 96.74% for Argument Identification and are able to further classify 73.93% of the labels correctly. While there is relevant ongoing research on Semantic Role Labelling and on building tools for code-mixed social media data, this is the first attempt at labelling semantic roles in code-mixed data, to the best of our knowledge.</abstract>
<identifier type="citekey">pal-sharma-2019-dataset</identifier>
<identifier type="doi">10.18653/v1/W19-4020</identifier>
<location>
<url>https://aclanthology.org/W19-4020</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>178</start>
<end>188</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset for Semantic Role Labelling of Hindi-English Code-Mixed Tweets
%A Pal, Riya
%A Sharma, Dipti
%Y Friedrich, Annemarie
%Y Zeyrek, Deniz
%Y Hoek, Jet
%S Proceedings of the 13th Linguistic Annotation Workshop
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F pal-sharma-2019-dataset
%X We present a data set of 1460 Hindi-English code-mixed tweets consisting of 20,949 tokens labelled with Proposition Bank labels marking their semantic roles. We created verb frames for complex predicates present in the corpus and formulated mappings from Paninian dependency labels to Proposition Bank labels. With the help of these mappings and the dependency tree, we propose a baseline rule based system for Semantic Role Labelling of Hindi-English code-mixed data. We obtain an accuracy of 96.74% for Argument Identification and are able to further classify 73.93% of the labels correctly. While there is relevant ongoing research on Semantic Role Labelling and on building tools for code-mixed social media data, this is the first attempt at labelling semantic roles in code-mixed data, to the best of our knowledge.
%R 10.18653/v1/W19-4020
%U https://aclanthology.org/W19-4020
%U https://doi.org/10.18653/v1/W19-4020
%P 178-188
Markdown (Informal)
[A Dataset for Semantic Role Labelling of Hindi-English Code-Mixed Tweets](https://aclanthology.org/W19-4020) (Pal & Sharma, LAW 2019)
ACL