@inproceedings{gupta-etal-2018-transliteration,
title = "Transliteration Better than Translation? Answering Code-mixed Questions over a Knowledge Base",
author = "Gupta, Vishal and
Chinnakotla, Manoj and
Shrivastava, Manish",
editor = "Aguilar, Gustavo and
AlGhamdi, Fahad and
Soto, Victor and
Solorio, Thamar and
Diab, Mona and
Hirschberg, Julia",
booktitle = "Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3205/",
doi = "10.18653/v1/W18-3205",
pages = "39--50",
abstract = "Humans can learn multiple languages. If they know a fact in one language, they can answer a question in another language they understand. They can also answer Code-mix (CM) questions: questions which contain both languages. This behavior is attributed to the unique learning ability of humans. Our task aims to study if machines can achieve this. We demonstrate how effectively a machine can answer CM questions. In this work, we adopt a two phase approach: candidate generation and candidate re-ranking to answer questions. We propose a Triplet-Siamese-Hybrid CNN (TSHCNN) to re-rank candidate answers. We show experiments on the SimpleQuestions dataset. Our network is trained only on English questions provided in this dataset and noisy Hindi translations of these questions and can answer English-Hindi CM questions effectively without the need of translation into English. Back-transliterated CM questions outperform their lexical and sentence level translated counterparts by 5{\%} {\&} 35{\%} in accuracy respectively, highlighting the efficacy of our approach in a resource constrained setting."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gupta-etal-2018-transliteration">
<titleInfo>
<title>Transliteration Better than Translation? Answering Code-mixed Questions over a Knowledge Base</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vishal</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manoj</namePart>
<namePart type="family">Chinnakotla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Shrivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gustavo</namePart>
<namePart type="family">Aguilar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fahad</namePart>
<namePart type="family">AlGhamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Soto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hirschberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Humans can learn multiple languages. If they know a fact in one language, they can answer a question in another language they understand. They can also answer Code-mix (CM) questions: questions which contain both languages. This behavior is attributed to the unique learning ability of humans. Our task aims to study if machines can achieve this. We demonstrate how effectively a machine can answer CM questions. In this work, we adopt a two phase approach: candidate generation and candidate re-ranking to answer questions. We propose a Triplet-Siamese-Hybrid CNN (TSHCNN) to re-rank candidate answers. We show experiments on the SimpleQuestions dataset. Our network is trained only on English questions provided in this dataset and noisy Hindi translations of these questions and can answer English-Hindi CM questions effectively without the need of translation into English. Back-transliterated CM questions outperform their lexical and sentence level translated counterparts by 5% & 35% in accuracy respectively, highlighting the efficacy of our approach in a resource constrained setting.</abstract>
<identifier type="citekey">gupta-etal-2018-transliteration</identifier>
<identifier type="doi">10.18653/v1/W18-3205</identifier>
<location>
<url>https://aclanthology.org/W18-3205/</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>39</start>
<end>50</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Transliteration Better than Translation? Answering Code-mixed Questions over a Knowledge Base
%A Gupta, Vishal
%A Chinnakotla, Manoj
%A Shrivastava, Manish
%Y Aguilar, Gustavo
%Y AlGhamdi, Fahad
%Y Soto, Victor
%Y Solorio, Thamar
%Y Diab, Mona
%Y Hirschberg, Julia
%S Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F gupta-etal-2018-transliteration
%X Humans can learn multiple languages. If they know a fact in one language, they can answer a question in another language they understand. They can also answer Code-mix (CM) questions: questions which contain both languages. This behavior is attributed to the unique learning ability of humans. Our task aims to study if machines can achieve this. We demonstrate how effectively a machine can answer CM questions. In this work, we adopt a two phase approach: candidate generation and candidate re-ranking to answer questions. We propose a Triplet-Siamese-Hybrid CNN (TSHCNN) to re-rank candidate answers. We show experiments on the SimpleQuestions dataset. Our network is trained only on English questions provided in this dataset and noisy Hindi translations of these questions and can answer English-Hindi CM questions effectively without the need of translation into English. Back-transliterated CM questions outperform their lexical and sentence level translated counterparts by 5% & 35% in accuracy respectively, highlighting the efficacy of our approach in a resource constrained setting.
%R 10.18653/v1/W18-3205
%U https://aclanthology.org/W18-3205/
%U https://doi.org/10.18653/v1/W18-3205
%P 39-50
Markdown (Informal)
[Transliteration Better than Translation? Answering Code-mixed Questions over a Knowledge Base](https://aclanthology.org/W18-3205/) (Gupta et al., ACL 2018)
ACL