@inproceedings{patro-etal-2017-english,
title = "All that is {E}nglish may be {H}indi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media",
author = "Patro, Jasabanta and
Samanta, Bidisha and
Singh, Saurabh and
Basu, Abhipsa and
Mukherjee, Prithwish and
Choudhury, Monojit and
Mukherjee, Animesh",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1240",
doi = "10.18653/v1/D17-1240",
pages = "2264--2274",
abstract = "n this paper, we present a set of computational methods to identify the likeliness of a word being borrowed, based on the signals from social media. In terms of Spearman{'}s correlation values, our methods perform more than two times better (∼ 0.62) in predicting the borrowing likeliness compared to the best performing baseline (∼ 0.26) reported in literature. Based on this likeliness estimate we asked annotators to re-annotate the language tags of foreign words in predominantly native contexts. In 88{\%} of cases the annotators felt that the foreign language tag should be replaced by native language tag, thus indicating a huge scope for improvement of automatic language identification systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patro-etal-2017-english">
<titleInfo>
<title>All that is English may be Hindi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jasabanta</namePart>
<namePart type="family">Patro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bidisha</namePart>
<namePart type="family">Samanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhipsa</namePart>
<namePart type="family">Basu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prithwish</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Animesh</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Hwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>n this paper, we present a set of computational methods to identify the likeliness of a word being borrowed, based on the signals from social media. In terms of Spearman’s correlation values, our methods perform more than two times better (∼ 0.62) in predicting the borrowing likeliness compared to the best performing baseline (∼ 0.26) reported in literature. Based on this likeliness estimate we asked annotators to re-annotate the language tags of foreign words in predominantly native contexts. In 88% of cases the annotators felt that the foreign language tag should be replaced by native language tag, thus indicating a huge scope for improvement of automatic language identification systems.</abstract>
<identifier type="citekey">patro-etal-2017-english</identifier>
<identifier type="doi">10.18653/v1/D17-1240</identifier>
<location>
<url>https://aclanthology.org/D17-1240</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>2264</start>
<end>2274</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T All that is English may be Hindi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media
%A Patro, Jasabanta
%A Samanta, Bidisha
%A Singh, Saurabh
%A Basu, Abhipsa
%A Mukherjee, Prithwish
%A Choudhury, Monojit
%A Mukherjee, Animesh
%Y Palmer, Martha
%Y Hwa, Rebecca
%Y Riedel, Sebastian
%S Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F patro-etal-2017-english
%X n this paper, we present a set of computational methods to identify the likeliness of a word being borrowed, based on the signals from social media. In terms of Spearman’s correlation values, our methods perform more than two times better (∼ 0.62) in predicting the borrowing likeliness compared to the best performing baseline (∼ 0.26) reported in literature. Based on this likeliness estimate we asked annotators to re-annotate the language tags of foreign words in predominantly native contexts. In 88% of cases the annotators felt that the foreign language tag should be replaced by native language tag, thus indicating a huge scope for improvement of automatic language identification systems.
%R 10.18653/v1/D17-1240
%U https://aclanthology.org/D17-1240
%U https://doi.org/10.18653/v1/D17-1240
%P 2264-2274
Markdown (Informal)
[All that is English may be Hindi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media](https://aclanthology.org/D17-1240) (Patro et al., EMNLP 2017)
ACL