@inproceedings{winata-etal-2018-code,
title = "Code-Switching Language Modeling using Syntax-Aware Multi-Task Learning",
author = "Winata, Genta Indra and
Madotto, Andrea and
Wu, Chien-Sheng and
Fung, Pascale",
editor = "Aguilar, Gustavo and
AlGhamdi, Fahad and
Soto, Victor and
Solorio, Thamar and
Diab, Mona and
Hirschberg, Julia",
booktitle = "Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3207",
doi = "10.18653/v1/W18-3207",
pages = "62--67",
abstract = "Lack of text data has been the major issue on code-switching language modeling. In this paper, we introduce multi-task learning based language model which shares syntax representation of languages to leverage linguistic information and tackle the low resource data issue. Our model jointly learns both language modeling and Part-of-Speech tagging on code-switched utterances. In this way, the model is able to identify the location of code-switching points and improves the prediction of next word. Our approach outperforms standard LSTM based language model, with an improvement of 9.7{\%} and 7.4{\%} in perplexity on SEAME Phase I and Phase II dataset respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="winata-etal-2018-code">
<titleInfo>
<title>Code-Switching Language Modeling using Syntax-Aware Multi-Task Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Genta</namePart>
<namePart type="given">Indra</namePart>
<namePart type="family">Winata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Madotto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chien-Sheng</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pascale</namePart>
<namePart type="family">Fung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gustavo</namePart>
<namePart type="family">Aguilar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fahad</namePart>
<namePart type="family">AlGhamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Soto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hirschberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lack of text data has been the major issue on code-switching language modeling. In this paper, we introduce multi-task learning based language model which shares syntax representation of languages to leverage linguistic information and tackle the low resource data issue. Our model jointly learns both language modeling and Part-of-Speech tagging on code-switched utterances. In this way, the model is able to identify the location of code-switching points and improves the prediction of next word. Our approach outperforms standard LSTM based language model, with an improvement of 9.7% and 7.4% in perplexity on SEAME Phase I and Phase II dataset respectively.</abstract>
<identifier type="citekey">winata-etal-2018-code</identifier>
<identifier type="doi">10.18653/v1/W18-3207</identifier>
<location>
<url>https://aclanthology.org/W18-3207</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>62</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Code-Switching Language Modeling using Syntax-Aware Multi-Task Learning
%A Winata, Genta Indra
%A Madotto, Andrea
%A Wu, Chien-Sheng
%A Fung, Pascale
%Y Aguilar, Gustavo
%Y AlGhamdi, Fahad
%Y Soto, Victor
%Y Solorio, Thamar
%Y Diab, Mona
%Y Hirschberg, Julia
%S Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F winata-etal-2018-code
%X Lack of text data has been the major issue on code-switching language modeling. In this paper, we introduce multi-task learning based language model which shares syntax representation of languages to leverage linguistic information and tackle the low resource data issue. Our model jointly learns both language modeling and Part-of-Speech tagging on code-switched utterances. In this way, the model is able to identify the location of code-switching points and improves the prediction of next word. Our approach outperforms standard LSTM based language model, with an improvement of 9.7% and 7.4% in perplexity on SEAME Phase I and Phase II dataset respectively.
%R 10.18653/v1/W18-3207
%U https://aclanthology.org/W18-3207
%U https://doi.org/10.18653/v1/W18-3207
%P 62-67
Markdown (Informal)
[Code-Switching Language Modeling using Syntax-Aware Multi-Task Learning](https://aclanthology.org/W18-3207) (Winata et al., ACL 2018)
ACL