@inproceedings{tavernier-etal-2008-holy,
title = "Holy {M}oses! Leveraging Existing Tools and Resources for Entity Translation",
author = "Tavernier, Jean and
Cowan, Rosa and
Vanni, Michelle",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}`08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L08-1580/",
abstract = "Recently, there has been an emphasis on creating shared resources for natural language processing applications. This has resulted in the development of high-quality tools and data, which can then be leveraged by the research community as components for novel systems. In this paper, we reuse an open source machine translation framework to create an Arabic-to-English entity translation system. The system first translates known entity mentions using a standard phrase-based statistical machine translation framework, which is then reused to perform name transliteration on unknown mentions. In order to transliterate names more accurately, we introduce an algorithm to augment a names database with name origin and frequency information from existing data resources. Origin information is used to learn name origin classifiers and origin-specific transliteration models, while frequency information is used to select amongst n-best transliteration candidates. This work demonstrates the feasibility and benefit of adapting such data resources and shows how off-the-shelf tools and data resources can be repurposed to rapidly create a system outside their original domain."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tavernier-etal-2008-holy">
<titleInfo>
<title>Holy Moses! Leveraging Existing Tools and Resources for Entity Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jean</namePart>
<namePart type="family">Tavernier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rosa</namePart>
<namePart type="family">Cowan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michelle</namePart>
<namePart type="family">Vanni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC‘08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, there has been an emphasis on creating shared resources for natural language processing applications. This has resulted in the development of high-quality tools and data, which can then be leveraged by the research community as components for novel systems. In this paper, we reuse an open source machine translation framework to create an Arabic-to-English entity translation system. The system first translates known entity mentions using a standard phrase-based statistical machine translation framework, which is then reused to perform name transliteration on unknown mentions. In order to transliterate names more accurately, we introduce an algorithm to augment a names database with name origin and frequency information from existing data resources. Origin information is used to learn name origin classifiers and origin-specific transliteration models, while frequency information is used to select amongst n-best transliteration candidates. This work demonstrates the feasibility and benefit of adapting such data resources and shows how off-the-shelf tools and data resources can be repurposed to rapidly create a system outside their original domain.</abstract>
<identifier type="citekey">tavernier-etal-2008-holy</identifier>
<location>
<url>https://aclanthology.org/L08-1580/</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Holy Moses! Leveraging Existing Tools and Resources for Entity Translation
%A Tavernier, Jean
%A Cowan, Rosa
%A Vanni, Michelle
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC‘08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F tavernier-etal-2008-holy
%X Recently, there has been an emphasis on creating shared resources for natural language processing applications. This has resulted in the development of high-quality tools and data, which can then be leveraged by the research community as components for novel systems. In this paper, we reuse an open source machine translation framework to create an Arabic-to-English entity translation system. The system first translates known entity mentions using a standard phrase-based statistical machine translation framework, which is then reused to perform name transliteration on unknown mentions. In order to transliterate names more accurately, we introduce an algorithm to augment a names database with name origin and frequency information from existing data resources. Origin information is used to learn name origin classifiers and origin-specific transliteration models, while frequency information is used to select amongst n-best transliteration candidates. This work demonstrates the feasibility and benefit of adapting such data resources and shows how off-the-shelf tools and data resources can be repurposed to rapidly create a system outside their original domain.
%U https://aclanthology.org/L08-1580/
Markdown (Informal)
[Holy Moses! Leveraging Existing Tools and Resources for Entity Translation](https://aclanthology.org/L08-1580/) (Tavernier et al., LREC 2008)
ACL