@inproceedings{mansour-ney-2012-arabic,
title = "{A}rabic-Segmentation Combination Strategies for Statistical Machine Translation",
author = "Mansour, Saab and
Ney, Hermann",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/509_Paper.pdf",
pages = "3915--3920",
abstract = "Arabic segmentation was already applied successfully for the task of statistical machine translation (SMT). Yet, there is no consistent comparison of the effect of different techniques and methods over the final translation quality. In this work, we use existing tools and further re-implement and develop new methods for segmentation. We compare the resulting SMT systems based on the different segmentation methods over the small IWSLT 2010 BTEC and the large NIST 2009 Arabic-to-English translation tasks. Our results show that for both small and large training data, segmentation yields strong improvements, but, the differences between the top ranked segmenters are statistically insignificant. Due to the different methodologies that we apply for segmentation, we expect a complimentary variation in the results achieved by each method. As done in previous work, we combine several segmentation schemes of the same model but achieve modest improvements. Next, we try a different strategy, where we combine the different segmentation methods rather than the different segmentation schemes. In this case, we achieve stronger improvements over the best single system. Finally, combining schemes and methods has another slight gain over the best combination strategy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mansour-ney-2012-arabic">
<titleInfo>
<title>Arabic-Segmentation Combination Strategies for Statistical Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saab</namePart>
<namePart type="family">Mansour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hermann</namePart>
<namePart type="family">Ney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Arabic segmentation was already applied successfully for the task of statistical machine translation (SMT). Yet, there is no consistent comparison of the effect of different techniques and methods over the final translation quality. In this work, we use existing tools and further re-implement and develop new methods for segmentation. We compare the resulting SMT systems based on the different segmentation methods over the small IWSLT 2010 BTEC and the large NIST 2009 Arabic-to-English translation tasks. Our results show that for both small and large training data, segmentation yields strong improvements, but, the differences between the top ranked segmenters are statistically insignificant. Due to the different methodologies that we apply for segmentation, we expect a complimentary variation in the results achieved by each method. As done in previous work, we combine several segmentation schemes of the same model but achieve modest improvements. Next, we try a different strategy, where we combine the different segmentation methods rather than the different segmentation schemes. In this case, we achieve stronger improvements over the best single system. Finally, combining schemes and methods has another slight gain over the best combination strategy.</abstract>
<identifier type="citekey">mansour-ney-2012-arabic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/509_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>3915</start>
<end>3920</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic-Segmentation Combination Strategies for Statistical Machine Translation
%A Mansour, Saab
%A Ney, Hermann
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F mansour-ney-2012-arabic
%X Arabic segmentation was already applied successfully for the task of statistical machine translation (SMT). Yet, there is no consistent comparison of the effect of different techniques and methods over the final translation quality. In this work, we use existing tools and further re-implement and develop new methods for segmentation. We compare the resulting SMT systems based on the different segmentation methods over the small IWSLT 2010 BTEC and the large NIST 2009 Arabic-to-English translation tasks. Our results show that for both small and large training data, segmentation yields strong improvements, but, the differences between the top ranked segmenters are statistically insignificant. Due to the different methodologies that we apply for segmentation, we expect a complimentary variation in the results achieved by each method. As done in previous work, we combine several segmentation schemes of the same model but achieve modest improvements. Next, we try a different strategy, where we combine the different segmentation methods rather than the different segmentation schemes. In this case, we achieve stronger improvements over the best single system. Finally, combining schemes and methods has another slight gain over the best combination strategy.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/509_Paper.pdf
%P 3915-3920
Markdown (Informal)
[Arabic-Segmentation Combination Strategies for Statistical Machine Translation](http://www.lrec-conf.org/proceedings/lrec2012/pdf/509_Paper.pdf) (Mansour & Ney, LREC 2012)
ACL