@inproceedings{p-etal-2025-bridge,
title = "Bridge the {GAP}: Multi-lingual Models For Ambiguous Pronominal Coreference Resolution in {S}outh {A}sian Languages",
author = "P, Rahothvarman and
Rajeev, Adith John and
Anuranjana, Kaveri and
Mamidi, Radhika",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.10/",
pages = "104--114",
abstract = "Coreference resolution, the process of determining what a referring expression (a pronoun or a noun phrase) refers to in discourse, is a critical aspect of natural language understanding. However, the development of computational models for coreference resolution in low-resource languages, such as the Dravidian (and more broadly all South Asian) languages, still remains a significant challenge due to the scarcity of annotated corpora in these languages. To address this data scarcity, we adopt a pipeline that translates the English GAP dataset into various South Asian languages, creating a multi-lingual coreference dataset mGAP. Our research aims to leverage this dataset and develop two novel models, namely the joint embedding model and the cross attention model for coreference resolution with Dravidian languages in mind. We also demonstrate that cross-attention captures pronoun-candidate relations better leading to improved coreference resolution. We also harness the similarity across South Asian languages via transfer learning in order to use high resource languages to learn coreference for low resource languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="p-etal-2025-bridge">
<titleInfo>
<title>Bridge the GAP: Multi-lingual Models For Ambiguous Pronominal Coreference Resolution in South Asian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rahothvarman</namePart>
<namePart type="family">P</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adith</namePart>
<namePart type="given">John</namePart>
<namePart type="family">Rajeev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaveri</namePart>
<namePart type="family">Anuranjana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radhika</namePart>
<namePart type="family">Mamidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Coreference resolution, the process of determining what a referring expression (a pronoun or a noun phrase) refers to in discourse, is a critical aspect of natural language understanding. However, the development of computational models for coreference resolution in low-resource languages, such as the Dravidian (and more broadly all South Asian) languages, still remains a significant challenge due to the scarcity of annotated corpora in these languages. To address this data scarcity, we adopt a pipeline that translates the English GAP dataset into various South Asian languages, creating a multi-lingual coreference dataset mGAP. Our research aims to leverage this dataset and develop two novel models, namely the joint embedding model and the cross attention model for coreference resolution with Dravidian languages in mind. We also demonstrate that cross-attention captures pronoun-candidate relations better leading to improved coreference resolution. We also harness the similarity across South Asian languages via transfer learning in order to use high resource languages to learn coreference for low resource languages.</abstract>
<identifier type="citekey">p-etal-2025-bridge</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.10/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>104</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bridge the GAP: Multi-lingual Models For Ambiguous Pronominal Coreference Resolution in South Asian Languages
%A P, Rahothvarman
%A Rajeev, Adith John
%A Anuranjana, Kaveri
%A Mamidi, Radhika
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F p-etal-2025-bridge
%X Coreference resolution, the process of determining what a referring expression (a pronoun or a noun phrase) refers to in discourse, is a critical aspect of natural language understanding. However, the development of computational models for coreference resolution in low-resource languages, such as the Dravidian (and more broadly all South Asian) languages, still remains a significant challenge due to the scarcity of annotated corpora in these languages. To address this data scarcity, we adopt a pipeline that translates the English GAP dataset into various South Asian languages, creating a multi-lingual coreference dataset mGAP. Our research aims to leverage this dataset and develop two novel models, namely the joint embedding model and the cross attention model for coreference resolution with Dravidian languages in mind. We also demonstrate that cross-attention captures pronoun-candidate relations better leading to improved coreference resolution. We also harness the similarity across South Asian languages via transfer learning in order to use high resource languages to learn coreference for low resource languages.
%U https://aclanthology.org/2025.chipsal-1.10/
%P 104-114
Markdown (Informal)
[Bridge the GAP: Multi-lingual Models For Ambiguous Pronominal Coreference Resolution in South Asian Languages](https://aclanthology.org/2025.chipsal-1.10/) (P et al., CHiPSAL 2025)
ACL