@inproceedings{jain-etal-2018-uniform,
title = "{U}niform {I}nformation {D}ensity Effects on Syntactic Choice in {H}indi",
author = "Jain, Ayush and
Singh, Vishal and
Ranjan, Sidharth and
Rajkumar, Rajakrishnan and
Agarwal, Sumeet",
editor = "Becerra-Bonache, Leonor and
Jim{\'e}nez-L{\'o}pez, M. Dolores and
Mart{\'i}n-Vide, Carlos and
Torrens-Urrutia, Adri{\`a}",
booktitle = "Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing",
month = aug,
year = "2018",
address = "Santa Fe, New-Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4605/",
pages = "38--48",
abstract = "According to the UNIFORM INFORMATION DENSITY (UID) hypothesis (Levy and Jaeger, 2007; Jaeger, 2010), speakers tend to distribute information density across the signal uniformly while producing language. The prior works cited above studied syntactic reduction in language production at particular choice points in a sentence. In contrast, we use a variant of the above UID hypothesis in order to investigate the extent to which word order choices in Hindi are influenced by the drive to minimize the variance of information across entire sentences. To this end, we propose multiple lexical and syntactic measures (at both word and constituent levels) to capture the uniform spread of information across a sentence. Subsequently, we incorporate these measures in machine learning models aimed to distinguish between a naturally occurring corpus sentence and its grammatical variants (expressing the same idea). Our results indicate that our UID measures are not a significant factor in predicting the corpus sentence in the presence of lexical surprisal, a competing control predictor. Finally, in the light of other recent works, we conclude with a discussion of reasons for UID not being suitable for a theory of word order."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jain-etal-2018-uniform">
<titleInfo>
<title>Uniform Information Density Effects on Syntactic Choice in Hindi</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ayush</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishal</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sidharth</namePart>
<namePart type="family">Ranjan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajakrishnan</namePart>
<namePart type="family">Rajkumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumeet</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leonor</namePart>
<namePart type="family">Becerra-Bonache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="given">Dolores</namePart>
<namePart type="family">Jiménez-López</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Martín-Vide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrià</namePart>
<namePart type="family">Torrens-Urrutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New-Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>According to the UNIFORM INFORMATION DENSITY (UID) hypothesis (Levy and Jaeger, 2007; Jaeger, 2010), speakers tend to distribute information density across the signal uniformly while producing language. The prior works cited above studied syntactic reduction in language production at particular choice points in a sentence. In contrast, we use a variant of the above UID hypothesis in order to investigate the extent to which word order choices in Hindi are influenced by the drive to minimize the variance of information across entire sentences. To this end, we propose multiple lexical and syntactic measures (at both word and constituent levels) to capture the uniform spread of information across a sentence. Subsequently, we incorporate these measures in machine learning models aimed to distinguish between a naturally occurring corpus sentence and its grammatical variants (expressing the same idea). Our results indicate that our UID measures are not a significant factor in predicting the corpus sentence in the presence of lexical surprisal, a competing control predictor. Finally, in the light of other recent works, we conclude with a discussion of reasons for UID not being suitable for a theory of word order.</abstract>
<identifier type="citekey">jain-etal-2018-uniform</identifier>
<location>
<url>https://aclanthology.org/W18-4605/</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>38</start>
<end>48</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Uniform Information Density Effects on Syntactic Choice in Hindi
%A Jain, Ayush
%A Singh, Vishal
%A Ranjan, Sidharth
%A Rajkumar, Rajakrishnan
%A Agarwal, Sumeet
%Y Becerra-Bonache, Leonor
%Y Jiménez-López, M. Dolores
%Y Martín-Vide, Carlos
%Y Torrens-Urrutia, Adrià
%S Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New-Mexico
%F jain-etal-2018-uniform
%X According to the UNIFORM INFORMATION DENSITY (UID) hypothesis (Levy and Jaeger, 2007; Jaeger, 2010), speakers tend to distribute information density across the signal uniformly while producing language. The prior works cited above studied syntactic reduction in language production at particular choice points in a sentence. In contrast, we use a variant of the above UID hypothesis in order to investigate the extent to which word order choices in Hindi are influenced by the drive to minimize the variance of information across entire sentences. To this end, we propose multiple lexical and syntactic measures (at both word and constituent levels) to capture the uniform spread of information across a sentence. Subsequently, we incorporate these measures in machine learning models aimed to distinguish between a naturally occurring corpus sentence and its grammatical variants (expressing the same idea). Our results indicate that our UID measures are not a significant factor in predicting the corpus sentence in the presence of lexical surprisal, a competing control predictor. Finally, in the light of other recent works, we conclude with a discussion of reasons for UID not being suitable for a theory of word order.
%U https://aclanthology.org/W18-4605/
%P 38-48
Markdown (Informal)
[Uniform Information Density Effects on Syntactic Choice in Hindi](https://aclanthology.org/W18-4605/) (Jain et al., 2018)
ACL
- Ayush Jain, Vishal Singh, Sidharth Ranjan, Rajakrishnan Rajkumar, and Sumeet Agarwal. 2018. Uniform Information Density Effects on Syntactic Choice in Hindi. In Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing, pages 38–48, Santa Fe, New-Mexico. Association for Computational Linguistics.