@inproceedings{wang-etal-2026-glossagen,
title = "{G}lossa{G}en: Making Academic Translation Smarter with Glossing",
author = "Wang, Zixiao and
Zhang, Duzhen and
Zhang, Juntian and
Liu, Yuhan and
Li, Guoming and
Wu, Haolun and
Song, Le and
Chen, Xiuying",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.952/",
pages = "19077--19095",
ISBN = "979-8-89176-395-1",
abstract = "When reading foreign-language literature, non-native users often face significant challenges. Existing traditional machine translation systems tend to obscure or mistranslate key terminology, while paraphrasing aimed at lay readers often oversimplifies it, thereby hindering their ability to master domain-specific technical vocabulary. To bridge this gap, we first define a novel task, Glossing-Oriented Academic Translation (GOAT), which aims to produce translations dynamically adapted to a reader{'}s academic proficiency, or level. We then propose GlossaGen, a comprehensive framework to address this task. GlossaGen features two key innovations: a multi-agent data synthesis pipeline that leverages academic personas to automatically generate a large-scale, structured dataset with level-specific explanations; and a novel training strategy based on dynamic adapter merging, which balances task generalization with user-level specialization by combining a ``generalist'' adapter with a fine-grained ``expert'' one. We evaluate GlossaGen on our synthesized benchmark, where results from automatic metrics, large language model (LLM)-based assessments, and human evaluations consistently demonstrate that our approach achieves higher scores than strong baselines across most metrics. Our framework provides a scalable pathway to enhance the comprehensibility of scientific literature for non-native readers, delivering more accurate translations accompanied by pedagogically sound, level-specific term explanations, and we release our code and data to facilitate further research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2026-glossagen">
<titleInfo>
<title>GlossaGen: Making Academic Translation Smarter with Glossing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zixiao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Duzhen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntian</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guoming</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haolun</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Le</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiuying</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>When reading foreign-language literature, non-native users often face significant challenges. Existing traditional machine translation systems tend to obscure or mistranslate key terminology, while paraphrasing aimed at lay readers often oversimplifies it, thereby hindering their ability to master domain-specific technical vocabulary. To bridge this gap, we first define a novel task, Glossing-Oriented Academic Translation (GOAT), which aims to produce translations dynamically adapted to a reader’s academic proficiency, or level. We then propose GlossaGen, a comprehensive framework to address this task. GlossaGen features two key innovations: a multi-agent data synthesis pipeline that leverages academic personas to automatically generate a large-scale, structured dataset with level-specific explanations; and a novel training strategy based on dynamic adapter merging, which balances task generalization with user-level specialization by combining a “generalist” adapter with a fine-grained “expert” one. We evaluate GlossaGen on our synthesized benchmark, where results from automatic metrics, large language model (LLM)-based assessments, and human evaluations consistently demonstrate that our approach achieves higher scores than strong baselines across most metrics. Our framework provides a scalable pathway to enhance the comprehensibility of scientific literature for non-native readers, delivering more accurate translations accompanied by pedagogically sound, level-specific term explanations, and we release our code and data to facilitate further research.</abstract>
<identifier type="citekey">wang-etal-2026-glossagen</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.952/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>19077</start>
<end>19095</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GlossaGen: Making Academic Translation Smarter with Glossing
%A Wang, Zixiao
%A Zhang, Duzhen
%A Zhang, Juntian
%A Liu, Yuhan
%A Li, Guoming
%A Wu, Haolun
%A Song, Le
%A Chen, Xiuying
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F wang-etal-2026-glossagen
%X When reading foreign-language literature, non-native users often face significant challenges. Existing traditional machine translation systems tend to obscure or mistranslate key terminology, while paraphrasing aimed at lay readers often oversimplifies it, thereby hindering their ability to master domain-specific technical vocabulary. To bridge this gap, we first define a novel task, Glossing-Oriented Academic Translation (GOAT), which aims to produce translations dynamically adapted to a reader’s academic proficiency, or level. We then propose GlossaGen, a comprehensive framework to address this task. GlossaGen features two key innovations: a multi-agent data synthesis pipeline that leverages academic personas to automatically generate a large-scale, structured dataset with level-specific explanations; and a novel training strategy based on dynamic adapter merging, which balances task generalization with user-level specialization by combining a “generalist” adapter with a fine-grained “expert” one. We evaluate GlossaGen on our synthesized benchmark, where results from automatic metrics, large language model (LLM)-based assessments, and human evaluations consistently demonstrate that our approach achieves higher scores than strong baselines across most metrics. Our framework provides a scalable pathway to enhance the comprehensibility of scientific literature for non-native readers, delivering more accurate translations accompanied by pedagogically sound, level-specific term explanations, and we release our code and data to facilitate further research.
%U https://aclanthology.org/2026.findings-acl.952/
%P 19077-19095
Markdown (Informal)
[GlossaGen: Making Academic Translation Smarter with Glossing](https://aclanthology.org/2026.findings-acl.952/) (Wang et al., Findings 2026)
ACL
- Zixiao Wang, Duzhen Zhang, Juntian Zhang, Yuhan Liu, Guoming Li, Haolun Wu, Le Song, and Xiuying Chen. 2026. GlossaGen: Making Academic Translation Smarter with Glossing. In Findings of the Association for Computational Linguistics: ACL 2026, pages 19077–19095, San Diego, California, United States. Association for Computational Linguistics.