@inproceedings{bhat-etal-2017-leveraging,
title = "Leveraging Newswire Treebanks for Parsing Conversational Data with Argument Scrambling",
author = "Bhat, Riyaz A. and
Bhat, Irshad and
Sharma, Dipti",
editor = "Miyao, Yusuke and
Sagae, Kenji",
booktitle = "Proceedings of the 15th International Conference on Parsing Technologies",
month = sep,
year = "2017",
address = "Pisa, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-6309/",
pages = "61--66",
abstract = "We investigate the problem of parsing conversational data of morphologically-rich languages such as Hindi where argument scrambling occurs frequently. We evaluate a state-of-the-art non-linear transition-based parsing system on a new dataset containing 506 dependency trees for sentences from Bollywood (Hindi) movie scripts and Twitter posts of Hindi monolingual speakers. We show that a dependency parser trained on a newswire treebank is strongly biased towards the canonical structures and degrades when applied to conversational data. Inspired by Transformational Generative Grammar (Chomsky, 1965), we mitigate the sampling bias by generating all theoretically possible alternative word orders of a clause from the existing (kernel) structures in the treebank. Training our parser on canonical and transformed structures improves performance on conversational data by around 9{\%} LAS over the baseline newswire parser."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhat-etal-2017-leveraging">
<titleInfo>
<title>Leveraging Newswire Treebanks for Parsing Conversational Data with Argument Scrambling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Riyaz</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irshad</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Parsing Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenji</namePart>
<namePart type="family">Sagae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the problem of parsing conversational data of morphologically-rich languages such as Hindi where argument scrambling occurs frequently. We evaluate a state-of-the-art non-linear transition-based parsing system on a new dataset containing 506 dependency trees for sentences from Bollywood (Hindi) movie scripts and Twitter posts of Hindi monolingual speakers. We show that a dependency parser trained on a newswire treebank is strongly biased towards the canonical structures and degrades when applied to conversational data. Inspired by Transformational Generative Grammar (Chomsky, 1965), we mitigate the sampling bias by generating all theoretically possible alternative word orders of a clause from the existing (kernel) structures in the treebank. Training our parser on canonical and transformed structures improves performance on conversational data by around 9% LAS over the baseline newswire parser.</abstract>
<identifier type="citekey">bhat-etal-2017-leveraging</identifier>
<location>
<url>https://aclanthology.org/W17-6309/</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>61</start>
<end>66</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Newswire Treebanks for Parsing Conversational Data with Argument Scrambling
%A Bhat, Riyaz A.
%A Bhat, Irshad
%A Sharma, Dipti
%Y Miyao, Yusuke
%Y Sagae, Kenji
%S Proceedings of the 15th International Conference on Parsing Technologies
%D 2017
%8 September
%I Association for Computational Linguistics
%C Pisa, Italy
%F bhat-etal-2017-leveraging
%X We investigate the problem of parsing conversational data of morphologically-rich languages such as Hindi where argument scrambling occurs frequently. We evaluate a state-of-the-art non-linear transition-based parsing system on a new dataset containing 506 dependency trees for sentences from Bollywood (Hindi) movie scripts and Twitter posts of Hindi monolingual speakers. We show that a dependency parser trained on a newswire treebank is strongly biased towards the canonical structures and degrades when applied to conversational data. Inspired by Transformational Generative Grammar (Chomsky, 1965), we mitigate the sampling bias by generating all theoretically possible alternative word orders of a clause from the existing (kernel) structures in the treebank. Training our parser on canonical and transformed structures improves performance on conversational data by around 9% LAS over the baseline newswire parser.
%U https://aclanthology.org/W17-6309/
%P 61-66
Markdown (Informal)
[Leveraging Newswire Treebanks for Parsing Conversational Data with Argument Scrambling](https://aclanthology.org/W17-6309/) (Bhat et al., IWPT 2017)
ACL