@inproceedings{chandu-etal-2018-language,
title = "Language Informed Modeling of Code-Switched Text",
author = "Chandu, Khyathi and
Manzini, Thomas and
Singh, Sumeet and
Black, Alan W.",
editor = "Aguilar, Gustavo and
AlGhamdi, Fahad and
Soto, Victor and
Solorio, Thamar and
Diab, Mona and
Hirschberg, Julia",
booktitle = "Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3211/",
doi = "10.18653/v1/W18-3211",
pages = "92--97",
abstract = "Code-switching (CS), the practice of alternating between two or more languages in conversations, is pervasive in most multi-lingual communities. CS texts have a complex interplay between languages and occur in informal contexts that make them harder to collect and construct NLP tools for. We approach this problem through Language Modeling (LM) on a new Hindi-English mixed corpus containing 59,189 unique sentences collected from blogging websites. We implement and discuss different Language Models derived from a multi-layered LSTM architecture. We hypothesize that encoding language information strengthens a language model by helping to learn code-switching points. We show that our highest performing model achieves a test perplexity of 19.52 on the CS corpus that we collected and processed. On this data we demonstrate that our performance is an improvement over AWD-LSTM LM (a recent state of the art on monolingual English)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chandu-etal-2018-language">
<titleInfo>
<title>Language Informed Modeling of Code-Switched Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Khyathi</namePart>
<namePart type="family">Chandu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Manzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumeet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Black</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gustavo</namePart>
<namePart type="family">Aguilar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fahad</namePart>
<namePart type="family">AlGhamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Soto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hirschberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code-switching (CS), the practice of alternating between two or more languages in conversations, is pervasive in most multi-lingual communities. CS texts have a complex interplay between languages and occur in informal contexts that make them harder to collect and construct NLP tools for. We approach this problem through Language Modeling (LM) on a new Hindi-English mixed corpus containing 59,189 unique sentences collected from blogging websites. We implement and discuss different Language Models derived from a multi-layered LSTM architecture. We hypothesize that encoding language information strengthens a language model by helping to learn code-switching points. We show that our highest performing model achieves a test perplexity of 19.52 on the CS corpus that we collected and processed. On this data we demonstrate that our performance is an improvement over AWD-LSTM LM (a recent state of the art on monolingual English).</abstract>
<identifier type="citekey">chandu-etal-2018-language</identifier>
<identifier type="doi">10.18653/v1/W18-3211</identifier>
<location>
<url>https://aclanthology.org/W18-3211/</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>92</start>
<end>97</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Informed Modeling of Code-Switched Text
%A Chandu, Khyathi
%A Manzini, Thomas
%A Singh, Sumeet
%A Black, Alan W.
%Y Aguilar, Gustavo
%Y AlGhamdi, Fahad
%Y Soto, Victor
%Y Solorio, Thamar
%Y Diab, Mona
%Y Hirschberg, Julia
%S Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F chandu-etal-2018-language
%X Code-switching (CS), the practice of alternating between two or more languages in conversations, is pervasive in most multi-lingual communities. CS texts have a complex interplay between languages and occur in informal contexts that make them harder to collect and construct NLP tools for. We approach this problem through Language Modeling (LM) on a new Hindi-English mixed corpus containing 59,189 unique sentences collected from blogging websites. We implement and discuss different Language Models derived from a multi-layered LSTM architecture. We hypothesize that encoding language information strengthens a language model by helping to learn code-switching points. We show that our highest performing model achieves a test perplexity of 19.52 on the CS corpus that we collected and processed. On this data we demonstrate that our performance is an improvement over AWD-LSTM LM (a recent state of the art on monolingual English).
%R 10.18653/v1/W18-3211
%U https://aclanthology.org/W18-3211/
%U https://doi.org/10.18653/v1/W18-3211
%P 92-97
Markdown (Informal)
[Language Informed Modeling of Code-Switched Text](https://aclanthology.org/W18-3211/) (Chandu et al., ACL 2018)
ACL
- Khyathi Chandu, Thomas Manzini, Sumeet Singh, and Alan W. Black. 2018. Language Informed Modeling of Code-Switched Text. In Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching, pages 92–97, Melbourne, Australia. Association for Computational Linguistics.