@inproceedings{le-etal-2026-towards,
title = "Towards Fast and Accurate Modeling for Cross-Lingual Label Projection",
author = "Le, Thang and
Nguyen, Huy Huu and
Luu, Anh Tuan and
Solorio, Thamar and
Nguyen, Thien Huu",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1817/",
pages = "39175--39198",
ISBN = "979-8-89176-390-6",
abstract = "Information extraction (IE) systems rely on structured data for training, but such annotated data is highly imbalanced across languages, with low-resource languages receiving little attention. Label projection techniques aim to bridge this gap by transferring structured annotations from high-resource to low-resource languages. However, existing methods are either inaccurate or too slow for large-scale use. This work aims to address this problem by developing a more effective method that remains sufficiently efficient for large-scale projection. In particular, we propose to synthesize alignment sequence pairs and fine-tune an encoder model with span alignment objective, while controlling data influence during training. Experimental results across 50+ languages show that our framework consistently outperforms previous state-of-the-art methods while maintaining fast inference speed. In addition, we introduce EXP - the first benchmark for explicit evaluation of label projection, thereby reducing confounders and non-determinism in method assessment."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="le-etal-2026-towards">
<titleInfo>
<title>Towards Fast and Accurate Modeling for Cross-Lingual Label Projection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thang</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huy</namePart>
<namePart type="given">Huu</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anh</namePart>
<namePart type="given">Tuan</namePart>
<namePart type="family">Luu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thien</namePart>
<namePart type="given">Huu</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Information extraction (IE) systems rely on structured data for training, but such annotated data is highly imbalanced across languages, with low-resource languages receiving little attention. Label projection techniques aim to bridge this gap by transferring structured annotations from high-resource to low-resource languages. However, existing methods are either inaccurate or too slow for large-scale use. This work aims to address this problem by developing a more effective method that remains sufficiently efficient for large-scale projection. In particular, we propose to synthesize alignment sequence pairs and fine-tune an encoder model with span alignment objective, while controlling data influence during training. Experimental results across 50+ languages show that our framework consistently outperforms previous state-of-the-art methods while maintaining fast inference speed. In addition, we introduce EXP - the first benchmark for explicit evaluation of label projection, thereby reducing confounders and non-determinism in method assessment.</abstract>
<identifier type="citekey">le-etal-2026-towards</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1817/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>39175</start>
<end>39198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Fast and Accurate Modeling for Cross-Lingual Label Projection
%A Le, Thang
%A Nguyen, Huy Huu
%A Luu, Anh Tuan
%A Solorio, Thamar
%A Nguyen, Thien Huu
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F le-etal-2026-towards
%X Information extraction (IE) systems rely on structured data for training, but such annotated data is highly imbalanced across languages, with low-resource languages receiving little attention. Label projection techniques aim to bridge this gap by transferring structured annotations from high-resource to low-resource languages. However, existing methods are either inaccurate or too slow for large-scale use. This work aims to address this problem by developing a more effective method that remains sufficiently efficient for large-scale projection. In particular, we propose to synthesize alignment sequence pairs and fine-tune an encoder model with span alignment objective, while controlling data influence during training. Experimental results across 50+ languages show that our framework consistently outperforms previous state-of-the-art methods while maintaining fast inference speed. In addition, we introduce EXP - the first benchmark for explicit evaluation of label projection, thereby reducing confounders and non-determinism in method assessment.
%U https://aclanthology.org/2026.acl-long.1817/
%P 39175-39198
Markdown (Informal)
[Towards Fast and Accurate Modeling for Cross-Lingual Label Projection](https://aclanthology.org/2026.acl-long.1817/) (Le et al., ACL 2026)
ACL
- Thang Le, Huy Huu Nguyen, Anh Tuan Luu, Thamar Solorio, and Thien Huu Nguyen. 2026. Towards Fast and Accurate Modeling for Cross-Lingual Label Projection. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 39175–39198, San Diego, California, United States. Association for Computational Linguistics.