@inproceedings{kumar-etal-2025-bridging-gap,
title = "Bridging the Gap: Efficient Cross-Lingual {NER} in Low-Resource Financial Domain",
author = "Kumar, Sunisth and
ElKholy, Mohammed and
Liu, Davide and
Boulenger, Alexandre",
editor = "Chen, Chung-Chi and
Moreno-Sandoval, Antonio and
Huang, Jimin and
Xie, Qianqian and
Ananiadou, Sophia and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.finnlp-1.5/",
pages = "54--62",
abstract = "We present an innovative and efficient modeling framework for cross-lingual named entity recognition (NER), leveraging the strengths of knowledge distillation and consistency training. Our approach distills knowledge from an XLM-RoBERTa model pre-trained on a high-resource source language (English) to a student model, which then undergoes semi-supervised consistency training with KL divergence loss on a low-resource target language (Arabic). We focus our application on the financial domain, using a small, sourced dataset of financial transactions as seen in SMS messages Using datasets comprising SMS messages in English and Arabic containing financial transaction information, we aim to transfer NER capabilities from English to Arabic with minimal labeled Arabic samples. The framework generalizes named entity recognition from English to Arabic, achieving F1 scores of 0.74 on the Arabic financial transaction dataset and 0.61 on the WikiANN dataset, surpassing or closely competing with models that have 1.7 and 5.3 more parameters, respectively, while efficiently training it on a single T4 GPU. Our experiments show that using a small number of labeled data for low-resource cross-lingual NER applications is a wiser choice than utilizing zero-shot techniques while also using up fewer resources. This framework holds significant potential for developing multilingual applications, particularly in regions where digital interactions span English and low-resource languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2025-bridging-gap">
<titleInfo>
<title>Bridging the Gap: Efficient Cross-Lingual NER in Low-Resource Financial Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunisth</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">ElKholy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Davide</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Boulenger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Moreno-Sandoval</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qianqian</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present an innovative and efficient modeling framework for cross-lingual named entity recognition (NER), leveraging the strengths of knowledge distillation and consistency training. Our approach distills knowledge from an XLM-RoBERTa model pre-trained on a high-resource source language (English) to a student model, which then undergoes semi-supervised consistency training with KL divergence loss on a low-resource target language (Arabic). We focus our application on the financial domain, using a small, sourced dataset of financial transactions as seen in SMS messages Using datasets comprising SMS messages in English and Arabic containing financial transaction information, we aim to transfer NER capabilities from English to Arabic with minimal labeled Arabic samples. The framework generalizes named entity recognition from English to Arabic, achieving F1 scores of 0.74 on the Arabic financial transaction dataset and 0.61 on the WikiANN dataset, surpassing or closely competing with models that have 1.7 and 5.3 more parameters, respectively, while efficiently training it on a single T4 GPU. Our experiments show that using a small number of labeled data for low-resource cross-lingual NER applications is a wiser choice than utilizing zero-shot techniques while also using up fewer resources. This framework holds significant potential for developing multilingual applications, particularly in regions where digital interactions span English and low-resource languages.</abstract>
<identifier type="citekey">kumar-etal-2025-bridging-gap</identifier>
<location>
<url>https://aclanthology.org/2025.finnlp-1.5/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>54</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bridging the Gap: Efficient Cross-Lingual NER in Low-Resource Financial Domain
%A Kumar, Sunisth
%A ElKholy, Mohammed
%A Liu, Davide
%A Boulenger, Alexandre
%Y Chen, Chung-Chi
%Y Moreno-Sandoval, Antonio
%Y Huang, Jimin
%Y Xie, Qianqian
%Y Ananiadou, Sophia
%Y Chen, Hsin-Hsi
%S Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F kumar-etal-2025-bridging-gap
%X We present an innovative and efficient modeling framework for cross-lingual named entity recognition (NER), leveraging the strengths of knowledge distillation and consistency training. Our approach distills knowledge from an XLM-RoBERTa model pre-trained on a high-resource source language (English) to a student model, which then undergoes semi-supervised consistency training with KL divergence loss on a low-resource target language (Arabic). We focus our application on the financial domain, using a small, sourced dataset of financial transactions as seen in SMS messages Using datasets comprising SMS messages in English and Arabic containing financial transaction information, we aim to transfer NER capabilities from English to Arabic with minimal labeled Arabic samples. The framework generalizes named entity recognition from English to Arabic, achieving F1 scores of 0.74 on the Arabic financial transaction dataset and 0.61 on the WikiANN dataset, surpassing or closely competing with models that have 1.7 and 5.3 more parameters, respectively, while efficiently training it on a single T4 GPU. Our experiments show that using a small number of labeled data for low-resource cross-lingual NER applications is a wiser choice than utilizing zero-shot techniques while also using up fewer resources. This framework holds significant potential for developing multilingual applications, particularly in regions where digital interactions span English and low-resource languages.
%U https://aclanthology.org/2025.finnlp-1.5/
%P 54-62
Markdown (Informal)
[Bridging the Gap: Efficient Cross-Lingual NER in Low-Resource Financial Domain](https://aclanthology.org/2025.finnlp-1.5/) (Kumar et al., FinNLP 2025)
ACL
- Sunisth Kumar, Mohammed ElKholy, Davide Liu, and Alexandre Boulenger. 2025. Bridging the Gap: Efficient Cross-Lingual NER in Low-Resource Financial Domain. In Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal), pages 54–62, Abu Dhabi, UAE. Association for Computational Linguistics.