@inproceedings{patro-etal-2017-english,
title = "All that is {E}nglish may be {H}indi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media",
author = "Patro, Jasabanta and
Samanta, Bidisha and
Singh, Saurabh and
Basu, Abhipsa and
Mukherjee, Prithwish and
Choudhury, Monojit and
Mukherjee, Animesh",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1240/",
doi = "10.18653/v1/D17-1240",
pages = "2264--2274",
abstract = "n this paper, we present a set of computational methods to identify the likeliness of a word being borrowed, based on the signals from social media. In terms of Spearman`s correlation values, our methods perform more than two times better ({\ensuremath{\sim}} 0.62) in predicting the borrowing likeliness compared to the best performing baseline ({\ensuremath{\sim}} 0.26) reported in literature. Based on this likeliness estimate we asked annotators to re-annotate the language tags of foreign words in predominantly native contexts. In 88{\%} of cases the annotators felt that the foreign language tag should be replaced by native language tag, thus indicating a huge scope for improvement of automatic language identification systems."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patro-etal-2017-english">
<titleInfo>
<title>All that is English may be Hindi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jasabanta</namePart>
<namePart type="family">Patro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bidisha</namePart>
<namePart type="family">Samanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhipsa</namePart>
<namePart type="family">Basu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prithwish</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Animesh</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Hwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>n this paper, we present a set of computational methods to identify the likeliness of a word being borrowed, based on the signals from social media. In terms of Spearman‘s correlation values, our methods perform more than two times better (\ensuremath\sim 0.62) in predicting the borrowing likeliness compared to the best performing baseline (\ensuremath\sim 0.26) reported in literature. Based on this likeliness estimate we asked annotators to re-annotate the language tags of foreign words in predominantly native contexts. In 88% of cases the annotators felt that the foreign language tag should be replaced by native language tag, thus indicating a huge scope for improvement of automatic language identification systems.</abstract>
<identifier type="citekey">patro-etal-2017-english</identifier>
<identifier type="doi">10.18653/v1/D17-1240</identifier>
<location>
<url>https://aclanthology.org/D17-1240/</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>2264</start>
<end>2274</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T All that is English may be Hindi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media
%A Patro, Jasabanta
%A Samanta, Bidisha
%A Singh, Saurabh
%A Basu, Abhipsa
%A Mukherjee, Prithwish
%A Choudhury, Monojit
%A Mukherjee, Animesh
%Y Palmer, Martha
%Y Hwa, Rebecca
%Y Riedel, Sebastian
%S Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F patro-etal-2017-english
%X n this paper, we present a set of computational methods to identify the likeliness of a word being borrowed, based on the signals from social media. In terms of Spearman‘s correlation values, our methods perform more than two times better (\ensuremath\sim 0.62) in predicting the borrowing likeliness compared to the best performing baseline (\ensuremath\sim 0.26) reported in literature. Based on this likeliness estimate we asked annotators to re-annotate the language tags of foreign words in predominantly native contexts. In 88% of cases the annotators felt that the foreign language tag should be replaced by native language tag, thus indicating a huge scope for improvement of automatic language identification systems.
%R 10.18653/v1/D17-1240
%U https://aclanthology.org/D17-1240/
%U https://doi.org/10.18653/v1/D17-1240
%P 2264-2274
Markdown (Informal)
[All that is English may be Hindi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media](https://aclanthology.org/D17-1240/) (Patro et al., EMNLP 2017)
ACL