@inproceedings{gonzalez-etal-2026-image,
title = "In-Image Machine Translation. A Preliminary Modular Approach",
author = "Gonzalez, Sergio Gomez and
Domingo, Miguel and
Casacuberta, Francisco",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-srw.38/",
pages = "502--513",
ISBN = "979-8-89176-383-8",
abstract = "In-image machine translation is a sub-task of Image-Based Machine Translation that aims to substitute text embedded in images with its translation into another language. In the current work, we define a simple task with a synthetic dataset based on rendering parallel text over a plain background. Furthermore, we experiment with different optical character recognition, machine translation and image synthesis models to include in our ensemble. Then, we present our cascade approach as a pipeline that obtains the transcript of the original image, translates it, and generates a new image (image synthesis) similar to the original one. Finally, we compare the performance of our approach with several current state-of-the-art models, including an end-to-end approach, demonstrating its competitiveness."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gonzalez-etal-2026-image">
<titleInfo>
<title>In-Image Machine Translation. A Preliminary Modular Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="given">Gomez</namePart>
<namePart type="family">Gonzalez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="family">Domingo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Casacuberta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Selene</namePart>
<namePart type="family">Baez Santamaria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Ashish</namePart>
<namePart type="family">Somayajula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atsuki</namePart>
<namePart type="family">Yamaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-383-8</identifier>
</relatedItem>
<abstract>In-image machine translation is a sub-task of Image-Based Machine Translation that aims to substitute text embedded in images with its translation into another language. In the current work, we define a simple task with a synthetic dataset based on rendering parallel text over a plain background. Furthermore, we experiment with different optical character recognition, machine translation and image synthesis models to include in our ensemble. Then, we present our cascade approach as a pipeline that obtains the transcript of the original image, translates it, and generates a new image (image synthesis) similar to the original one. Finally, we compare the performance of our approach with several current state-of-the-art models, including an end-to-end approach, demonstrating its competitiveness.</abstract>
<identifier type="citekey">gonzalez-etal-2026-image</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-srw.38/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>502</start>
<end>513</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T In-Image Machine Translation. A Preliminary Modular Approach
%A Gonzalez, Sergio Gomez
%A Domingo, Miguel
%A Casacuberta, Francisco
%Y Baez Santamaria, Selene
%Y Somayajula, Sai Ashish
%Y Yamaguchi, Atsuki
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-383-8
%F gonzalez-etal-2026-image
%X In-image machine translation is a sub-task of Image-Based Machine Translation that aims to substitute text embedded in images with its translation into another language. In the current work, we define a simple task with a synthetic dataset based on rendering parallel text over a plain background. Furthermore, we experiment with different optical character recognition, machine translation and image synthesis models to include in our ensemble. Then, we present our cascade approach as a pipeline that obtains the transcript of the original image, translates it, and generates a new image (image synthesis) similar to the original one. Finally, we compare the performance of our approach with several current state-of-the-art models, including an end-to-end approach, demonstrating its competitiveness.
%U https://aclanthology.org/2026.eacl-srw.38/
%P 502-513
Markdown (Informal)
[In-Image Machine Translation. A Preliminary Modular Approach](https://aclanthology.org/2026.eacl-srw.38/) (Gonzalez et al., EACL 2026)
ACL
- Sergio Gomez Gonzalez, Miguel Domingo, and Francisco Casacuberta. 2026. In-Image Machine Translation. A Preliminary Modular Approach. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 502–513, Rabat, Morocco. Association for Computational Linguistics.