@inproceedings{ryu-etal-2026-language,
title = "Language-Grounded Multi-Domain Image Translation via Semantic Difference Guidance",
author = "Ryu, Jongwon and
Park, Joonhyung and
Han, Jaeho and
Kim, Yeong-Seok and
Kim, Hye-Rin and
Yoon, Sunjae and
Kim, Junyeong",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.294/",
pages = "6276--6288",
ISBN = "979-8-89176-380-7",
abstract = "Multi-domain image-to-image translation requires grounding semantic differences expressed in natural language prompts into corresponding visual transformations, while preserving unrelated structural and semantic content. Existing methods struggle to maintain structural integrity and provide fine-grained, attribute-specific control, especially when multiple domains are involved. We propose LACE (Language-grounded Attribute-Controllable Translation), built on two components: (1) a GLIP-Adapter that fuses global semantics with local structural features to preserve consistency, and (2) a Multi-Domain Control Guidance mechanism that explicitly grounds the semantic delta between source and target prompts into per-attribute translation vectors, aligning linguistic semantics with domain-level visual changes. Together, these modules enable compositional multi-domain control with independent strength modulation for each attribute. Experiments on CelebA(Dialog) and BDD100K demonstrate that LACE achieves high visual fidelity, structural preservation, and interpretable domain-specific control, surpassing prior baselines. This positions LACE as a cross-modal content generation framework bridging language semantics and controllable visual translation. Code will be publicly available."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ryu-etal-2026-language">
<titleInfo>
<title>Language-Grounded Multi-Domain Image Translation via Semantic Difference Guidance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jongwon</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joonhyung</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaeho</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yeong-Seok</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hye-Rin</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunjae</namePart>
<namePart type="family">Yoon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyeong</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Multi-domain image-to-image translation requires grounding semantic differences expressed in natural language prompts into corresponding visual transformations, while preserving unrelated structural and semantic content. Existing methods struggle to maintain structural integrity and provide fine-grained, attribute-specific control, especially when multiple domains are involved. We propose LACE (Language-grounded Attribute-Controllable Translation), built on two components: (1) a GLIP-Adapter that fuses global semantics with local structural features to preserve consistency, and (2) a Multi-Domain Control Guidance mechanism that explicitly grounds the semantic delta between source and target prompts into per-attribute translation vectors, aligning linguistic semantics with domain-level visual changes. Together, these modules enable compositional multi-domain control with independent strength modulation for each attribute. Experiments on CelebA(Dialog) and BDD100K demonstrate that LACE achieves high visual fidelity, structural preservation, and interpretable domain-specific control, surpassing prior baselines. This positions LACE as a cross-modal content generation framework bridging language semantics and controllable visual translation. Code will be publicly available.</abstract>
<identifier type="citekey">ryu-etal-2026-language</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.294/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>6276</start>
<end>6288</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language-Grounded Multi-Domain Image Translation via Semantic Difference Guidance
%A Ryu, Jongwon
%A Park, Joonhyung
%A Han, Jaeho
%A Kim, Yeong-Seok
%A Kim, Hye-Rin
%A Yoon, Sunjae
%A Kim, Junyeong
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F ryu-etal-2026-language
%X Multi-domain image-to-image translation requires grounding semantic differences expressed in natural language prompts into corresponding visual transformations, while preserving unrelated structural and semantic content. Existing methods struggle to maintain structural integrity and provide fine-grained, attribute-specific control, especially when multiple domains are involved. We propose LACE (Language-grounded Attribute-Controllable Translation), built on two components: (1) a GLIP-Adapter that fuses global semantics with local structural features to preserve consistency, and (2) a Multi-Domain Control Guidance mechanism that explicitly grounds the semantic delta between source and target prompts into per-attribute translation vectors, aligning linguistic semantics with domain-level visual changes. Together, these modules enable compositional multi-domain control with independent strength modulation for each attribute. Experiments on CelebA(Dialog) and BDD100K demonstrate that LACE achieves high visual fidelity, structural preservation, and interpretable domain-specific control, surpassing prior baselines. This positions LACE as a cross-modal content generation framework bridging language semantics and controllable visual translation. Code will be publicly available.
%U https://aclanthology.org/2026.eacl-long.294/
%P 6276-6288
Markdown (Informal)
[Language-Grounded Multi-Domain Image Translation via Semantic Difference Guidance](https://aclanthology.org/2026.eacl-long.294/) (Ryu et al., EACL 2026)
ACL