@inproceedings{alshaikh-saleh-ahmad-2026-seeing,
title = "Seeing Words Differently: Visual Embeddings for Robust {E}nglish-{A}rabic Machine Translation",
author = "Alshaikh Saleh, Mahdi and
Ahmad, Irfan",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.9/",
pages = "66--74",
abstract = "Context: Natural Language Processing (NLP) has become an essential field with widespread applications across domains such as Large Language Models (LLMs). One of the core applications of NLP is machine translation (MT). A major challenge in MT is handling out-of-vocabulary (OOV) words and spelling mistakes, which can lead to poor translation quality. Objective: This study compares traditional text-based embeddings with visual embeddings for English-to-Arabic translation. It investigates the effectiveness of each approach, especially in handling noisy inputs or OOV terms. Method: Using the IWSLT 2017 English-Arabic dataset, we trained a baseline transformer encoder-decoder model using standard text embeddings and compared it with models using several visual embeddings strategies, including vowel-removal preprocessing and trigram-based image rendering. The translated outputs were evaluated using BLEU scores. Results: show that although traditional BPE-based models achieve higher BLEU on clean data, visual embedding models are substantially more robust to spelling noise, retaining up to 2.4{\texttimes} higher BLEU scores at 50{\%} character corruption."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alshaikh-saleh-ahmad-2026-seeing">
<titleInfo>
<title>Seeing Words Differently: Visual Embeddings for Robust English-Arabic Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mahdi</namePart>
<namePart type="family">Alshaikh Saleh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irfan</namePart>
<namePart type="family">Ahmad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Context: Natural Language Processing (NLP) has become an essential field with widespread applications across domains such as Large Language Models (LLMs). One of the core applications of NLP is machine translation (MT). A major challenge in MT is handling out-of-vocabulary (OOV) words and spelling mistakes, which can lead to poor translation quality. Objective: This study compares traditional text-based embeddings with visual embeddings for English-to-Arabic translation. It investigates the effectiveness of each approach, especially in handling noisy inputs or OOV terms. Method: Using the IWSLT 2017 English-Arabic dataset, we trained a baseline transformer encoder-decoder model using standard text embeddings and compared it with models using several visual embeddings strategies, including vowel-removal preprocessing and trigram-based image rendering. The translated outputs were evaluated using BLEU scores. Results: show that although traditional BPE-based models achieve higher BLEU on clean data, visual embedding models are substantially more robust to spelling noise, retaining up to 2.4× higher BLEU scores at 50% character corruption.</abstract>
<identifier type="citekey">alshaikh-saleh-ahmad-2026-seeing</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.9/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>66</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Seeing Words Differently: Visual Embeddings for Robust English-Arabic Machine Translation
%A Alshaikh Saleh, Mahdi
%A Ahmad, Irfan
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F alshaikh-saleh-ahmad-2026-seeing
%X Context: Natural Language Processing (NLP) has become an essential field with widespread applications across domains such as Large Language Models (LLMs). One of the core applications of NLP is machine translation (MT). A major challenge in MT is handling out-of-vocabulary (OOV) words and spelling mistakes, which can lead to poor translation quality. Objective: This study compares traditional text-based embeddings with visual embeddings for English-to-Arabic translation. It investigates the effectiveness of each approach, especially in handling noisy inputs or OOV terms. Method: Using the IWSLT 2017 English-Arabic dataset, we trained a baseline transformer encoder-decoder model using standard text embeddings and compared it with models using several visual embeddings strategies, including vowel-removal preprocessing and trigram-based image rendering. The translated outputs were evaluated using BLEU scores. Results: show that although traditional BPE-based models achieve higher BLEU on clean data, visual embedding models are substantially more robust to spelling noise, retaining up to 2.4× higher BLEU scores at 50% character corruption.
%U https://aclanthology.org/2026.abjadnlp-1.9/
%P 66-74
Markdown (Informal)
[Seeing Words Differently: Visual Embeddings for Robust English-Arabic Machine Translation](https://aclanthology.org/2026.abjadnlp-1.9/) (Alshaikh Saleh & Ahmad, AbjadNLP 2026)
ACL