@inproceedings{upadhyay-etal-2018-bootstrapping,
title = "Bootstrapping Transliteration with Constrained Discovery for Low-Resource Languages",
author = "Upadhyay, Shyam and
Kodner, Jordan and
Roth, Dan",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D18-1046/",
doi = "10.18653/v1/D18-1046",
pages = "501--511",
abstract = "Generating the English transliteration of a name written in a foreign script is an important and challenging step in multilingual knowledge acquisition and information extraction. Existing approaches to transliteration generation require a large ({\ensuremath{>}}5000) number of training examples. This difficulty contrasts with transliteration discovery, a somewhat easier task that involves picking a plausible transliteration from a given list. In this work, we present a bootstrapping algorithm that uses constrained discovery to improve generation, and can be used with as few as 500 training examples, which we show can be sourced from annotators in a matter of hours. This opens the task to languages for which large number of training examples are unavailable. We evaluate transliteration generation performance itself, as well the improvement it brings to cross-lingual candidate generation for entity linking, a typical downstream task. We present a comprehensive evaluation of our approach on nine languages, each written in a unique script."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="upadhyay-etal-2018-bootstrapping">
<titleInfo>
<title>Bootstrapping Transliteration with Constrained Discovery for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Upadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Kodner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Riloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Chiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hockenmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun’ichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generating the English transliteration of a name written in a foreign script is an important and challenging step in multilingual knowledge acquisition and information extraction. Existing approaches to transliteration generation require a large (\ensuremath>5000) number of training examples. This difficulty contrasts with transliteration discovery, a somewhat easier task that involves picking a plausible transliteration from a given list. In this work, we present a bootstrapping algorithm that uses constrained discovery to improve generation, and can be used with as few as 500 training examples, which we show can be sourced from annotators in a matter of hours. This opens the task to languages for which large number of training examples are unavailable. We evaluate transliteration generation performance itself, as well the improvement it brings to cross-lingual candidate generation for entity linking, a typical downstream task. We present a comprehensive evaluation of our approach on nine languages, each written in a unique script.</abstract>
<identifier type="citekey">upadhyay-etal-2018-bootstrapping</identifier>
<identifier type="doi">10.18653/v1/D18-1046</identifier>
<location>
<url>https://aclanthology.org/D18-1046/</url>
</location>
<part>
<date>2018-oct-nov</date>
<extent unit="page">
<start>501</start>
<end>511</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bootstrapping Transliteration with Constrained Discovery for Low-Resource Languages
%A Upadhyay, Shyam
%A Kodner, Jordan
%A Roth, Dan
%Y Riloff, Ellen
%Y Chiang, David
%Y Hockenmaier, Julia
%Y Tsujii, Jun’ichi
%S Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing
%D 2018
%8 oct nov
%I Association for Computational Linguistics
%C Brussels, Belgium
%F upadhyay-etal-2018-bootstrapping
%X Generating the English transliteration of a name written in a foreign script is an important and challenging step in multilingual knowledge acquisition and information extraction. Existing approaches to transliteration generation require a large (\ensuremath>5000) number of training examples. This difficulty contrasts with transliteration discovery, a somewhat easier task that involves picking a plausible transliteration from a given list. In this work, we present a bootstrapping algorithm that uses constrained discovery to improve generation, and can be used with as few as 500 training examples, which we show can be sourced from annotators in a matter of hours. This opens the task to languages for which large number of training examples are unavailable. We evaluate transliteration generation performance itself, as well the improvement it brings to cross-lingual candidate generation for entity linking, a typical downstream task. We present a comprehensive evaluation of our approach on nine languages, each written in a unique script.
%R 10.18653/v1/D18-1046
%U https://aclanthology.org/D18-1046/
%U https://doi.org/10.18653/v1/D18-1046
%P 501-511
Markdown (Informal)
[Bootstrapping Transliteration with Constrained Discovery for Low-Resource Languages](https://aclanthology.org/D18-1046/) (Upadhyay et al., EMNLP 2018)
ACL