@inproceedings{lee-etal-2019-learning,
title = "Learning with Limited Data for Multilingual Reading Comprehension",
author = "Lee, Kyungjae and
Park, Sunghyun and
Han, Hojae and
Yeo, Jinyoung and
Hwang, Seung-won and
Lee, Juho",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1283",
doi = "10.18653/v1/D19-1283",
pages = "2840--2850",
abstract = "This paper studies the problem of supporting question answering in a new language with limited training resources. As an extreme scenario, when no such resource exists, one can (1) transfer labels from another language, and (2) generate labels from unlabeled data, using translator and automatic labeling function respectively. However, these approaches inevitably introduce noises to the training data, due to translation or generation errors, which require a judicious use of data with varying confidence. To address this challenge, we propose a weakly-supervised framework that quantifies such noises from automatically generated labels, to deemphasize or fix noisy data in training. On reading comprehension task, we demonstrate the effectiveness of our model on low-resource languages with varying similarity to English, namely, Korean and French.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2019-learning">
<titleInfo>
<title>Learning with Limited Data for Multilingual Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kyungjae</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunghyun</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hojae</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinyoung</namePart>
<namePart type="family">Yeo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-won</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juho</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper studies the problem of supporting question answering in a new language with limited training resources. As an extreme scenario, when no such resource exists, one can (1) transfer labels from another language, and (2) generate labels from unlabeled data, using translator and automatic labeling function respectively. However, these approaches inevitably introduce noises to the training data, due to translation or generation errors, which require a judicious use of data with varying confidence. To address this challenge, we propose a weakly-supervised framework that quantifies such noises from automatically generated labels, to deemphasize or fix noisy data in training. On reading comprehension task, we demonstrate the effectiveness of our model on low-resource languages with varying similarity to English, namely, Korean and French.</abstract>
<identifier type="citekey">lee-etal-2019-learning</identifier>
<identifier type="doi">10.18653/v1/D19-1283</identifier>
<location>
<url>https://aclanthology.org/D19-1283</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>2840</start>
<end>2850</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning with Limited Data for Multilingual Reading Comprehension
%A Lee, Kyungjae
%A Park, Sunghyun
%A Han, Hojae
%A Yeo, Jinyoung
%A Hwang, Seung-won
%A Lee, Juho
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F lee-etal-2019-learning
%X This paper studies the problem of supporting question answering in a new language with limited training resources. As an extreme scenario, when no such resource exists, one can (1) transfer labels from another language, and (2) generate labels from unlabeled data, using translator and automatic labeling function respectively. However, these approaches inevitably introduce noises to the training data, due to translation or generation errors, which require a judicious use of data with varying confidence. To address this challenge, we propose a weakly-supervised framework that quantifies such noises from automatically generated labels, to deemphasize or fix noisy data in training. On reading comprehension task, we demonstrate the effectiveness of our model on low-resource languages with varying similarity to English, namely, Korean and French.
%R 10.18653/v1/D19-1283
%U https://aclanthology.org/D19-1283
%U https://doi.org/10.18653/v1/D19-1283
%P 2840-2850
Markdown (Informal)
[Learning with Limited Data for Multilingual Reading Comprehension](https://aclanthology.org/D19-1283) (Lee et al., EMNLP-IJCNLP 2019)
ACL
- Kyungjae Lee, Sunghyun Park, Hojae Han, Jinyoung Yeo, Seung-won Hwang, and Juho Lee. 2019. Learning with Limited Data for Multilingual Reading Comprehension. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 2840–2850, Hong Kong, China. Association for Computational Linguistics.