@inproceedings{jabbari-etal-2012-developing,
title = "Developing an Open-domain {E}nglish-{F}arsi Translation System Using {AFEC}: Amirkabir Bilingual {F}arsi-{E}nglish Corpus",
author = "Jabbari, Fattaneh and
Bakshaei, Somayeh and
Mohammadzadeh Ziabary, Seyyed Mohammad and
Khadivi, Shahram",
editor = "Farghaly, Ali and
Oroumchian, Farhad",
booktitle = "Fourth Workshop on Computational Approaches to Arabic-Script-based Languages",
month = nov # " 1",
year = "2012",
address = "San Diego, California, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2012.amta-caas14.3",
pages = "17--23",
abstract = "The translation quality of Statistical Machine Translation (SMT) depends on the amount of input data especially for morphologically rich languages. Farsi (Persian) language is such a language which has few NLP resources. It also suffers from the non-standard written characters which causes a large variety in the written form of each character. Moreover, the structural difference between Farsi and English results in long range reorderings which cannot be modeled by common SMT reordering models. Here, we try to improve the existing English-Farsi SMT system focusing on these challenges first by expanding our bilingual limited-domain corpus to an open-domain one. Then, to alleviate the character variations, a new text normalization algorithm is offered. Finally, some hand-crafted rules are applied to reduce the structural differences. Using the new corpus, the experimental results showed 8.82{\%} BLEU improvement by applying new normalization method and 9.1{\%} BLEU when rules are used.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jabbari-etal-2012-developing">
<titleInfo>
<title>Developing an Open-domain English-Farsi Translation System Using AFEC: Amirkabir Bilingual Farsi-English Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fattaneh</namePart>
<namePart type="family">Jabbari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Somayeh</namePart>
<namePart type="family">Bakshaei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seyyed</namePart>
<namePart type="given">Mohammad</namePart>
<namePart type="family">Mohammadzadeh Ziabary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahram</namePart>
<namePart type="family">Khadivi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-nov 1</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Fourth Workshop on Computational Approaches to Arabic-Script-based Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Farghaly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Farhad</namePart>
<namePart type="family">Oroumchian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The translation quality of Statistical Machine Translation (SMT) depends on the amount of input data especially for morphologically rich languages. Farsi (Persian) language is such a language which has few NLP resources. It also suffers from the non-standard written characters which causes a large variety in the written form of each character. Moreover, the structural difference between Farsi and English results in long range reorderings which cannot be modeled by common SMT reordering models. Here, we try to improve the existing English-Farsi SMT system focusing on these challenges first by expanding our bilingual limited-domain corpus to an open-domain one. Then, to alleviate the character variations, a new text normalization algorithm is offered. Finally, some hand-crafted rules are applied to reduce the structural differences. Using the new corpus, the experimental results showed 8.82% BLEU improvement by applying new normalization method and 9.1% BLEU when rules are used.</abstract>
<identifier type="citekey">jabbari-etal-2012-developing</identifier>
<location>
<url>https://aclanthology.org/2012.amta-caas14.3</url>
</location>
<part>
<date>2012-nov 1</date>
<extent unit="page">
<start>17</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing an Open-domain English-Farsi Translation System Using AFEC: Amirkabir Bilingual Farsi-English Corpus
%A Jabbari, Fattaneh
%A Bakshaei, Somayeh
%A Mohammadzadeh Ziabary, Seyyed Mohammad
%A Khadivi, Shahram
%Y Farghaly, Ali
%Y Oroumchian, Farhad
%S Fourth Workshop on Computational Approaches to Arabic-Script-based Languages
%D 2012
%8 nov 1
%I Association for Machine Translation in the Americas
%C San Diego, California, USA
%F jabbari-etal-2012-developing
%X The translation quality of Statistical Machine Translation (SMT) depends on the amount of input data especially for morphologically rich languages. Farsi (Persian) language is such a language which has few NLP resources. It also suffers from the non-standard written characters which causes a large variety in the written form of each character. Moreover, the structural difference between Farsi and English results in long range reorderings which cannot be modeled by common SMT reordering models. Here, we try to improve the existing English-Farsi SMT system focusing on these challenges first by expanding our bilingual limited-domain corpus to an open-domain one. Then, to alleviate the character variations, a new text normalization algorithm is offered. Finally, some hand-crafted rules are applied to reduce the structural differences. Using the new corpus, the experimental results showed 8.82% BLEU improvement by applying new normalization method and 9.1% BLEU when rules are used.
%U https://aclanthology.org/2012.amta-caas14.3
%P 17-23
Markdown (Informal)
[Developing an Open-domain English-Farsi Translation System Using AFEC: Amirkabir Bilingual Farsi-English Corpus](https://aclanthology.org/2012.amta-caas14.3) (Jabbari et al., AMTA 2012)
ACL