@inproceedings{sarkar-etal-2025-mahanama,
title = "Mah{\={a}}n{\={a}}ma: A Unique Testbed for Literary Entity Discovery and Linking",
author = "Sarkar, Sujoy and
Sarkar, Gourav and
Jagadeeshan, Manoj Balaji and
Sandhan, Jivnesh and
Krishna, Amrith and
Goyal, Pawan",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1269/",
doi = "10.18653/v1/2025.emnlp-main.1269",
pages = "24970--24984",
ISBN = "979-8-89176-332-6",
abstract = "High lexical variation, ambiguous references, and long-range dependencies make entity resolution in literary texts particularly challenging. We present Mah{\={a}}n{\={a}}ma, the first large-scale dataset for end-to-end Entity Discovery and Linking (EDL) in Sanskrit, a morphologically rich and under-resourced language. Derived from the Mah{\={a}}bh{\={a}}rata , the world{'}s longest epic, the dataset comprises over 109K named entity mentions mapped to 5.5K unique entities, and is aligned with an English knowledge base to support cross-lingual linking. The complex narrative structure of Mah{\={a}}n{\={a}}ma, coupled with extensive name variation and ambiguity, poses significant challenges to resolution systems. Our evaluation reveals that current coreference and entity linking models struggle when evaluated on the global context of the test set. These results highlight the limitations of current approaches in resolving entities within such complex discourse. Mah{\={a}}n{\={a}}ma thus provides a unique benchmark for advancing entity resolution, especially in literary domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sarkar-etal-2025-mahanama">
<titleInfo>
<title>Mahānāma: A Unique Testbed for Literary Entity Discovery and Linking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sujoy</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gourav</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manoj</namePart>
<namePart type="given">Balaji</namePart>
<namePart type="family">Jagadeeshan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jivnesh</namePart>
<namePart type="family">Sandhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amrith</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pawan</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>High lexical variation, ambiguous references, and long-range dependencies make entity resolution in literary texts particularly challenging. We present Mahānāma, the first large-scale dataset for end-to-end Entity Discovery and Linking (EDL) in Sanskrit, a morphologically rich and under-resourced language. Derived from the Mahābhārata , the world’s longest epic, the dataset comprises over 109K named entity mentions mapped to 5.5K unique entities, and is aligned with an English knowledge base to support cross-lingual linking. The complex narrative structure of Mahānāma, coupled with extensive name variation and ambiguity, poses significant challenges to resolution systems. Our evaluation reveals that current coreference and entity linking models struggle when evaluated on the global context of the test set. These results highlight the limitations of current approaches in resolving entities within such complex discourse. Mahānāma thus provides a unique benchmark for advancing entity resolution, especially in literary domains.</abstract>
<identifier type="citekey">sarkar-etal-2025-mahanama</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-main.1269</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.1269/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>24970</start>
<end>24984</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mahānāma: A Unique Testbed for Literary Entity Discovery and Linking
%A Sarkar, Sujoy
%A Sarkar, Gourav
%A Jagadeeshan, Manoj Balaji
%A Sandhan, Jivnesh
%A Krishna, Amrith
%A Goyal, Pawan
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F sarkar-etal-2025-mahanama
%X High lexical variation, ambiguous references, and long-range dependencies make entity resolution in literary texts particularly challenging. We present Mahānāma, the first large-scale dataset for end-to-end Entity Discovery and Linking (EDL) in Sanskrit, a morphologically rich and under-resourced language. Derived from the Mahābhārata , the world’s longest epic, the dataset comprises over 109K named entity mentions mapped to 5.5K unique entities, and is aligned with an English knowledge base to support cross-lingual linking. The complex narrative structure of Mahānāma, coupled with extensive name variation and ambiguity, poses significant challenges to resolution systems. Our evaluation reveals that current coreference and entity linking models struggle when evaluated on the global context of the test set. These results highlight the limitations of current approaches in resolving entities within such complex discourse. Mahānāma thus provides a unique benchmark for advancing entity resolution, especially in literary domains.
%R 10.18653/v1/2025.emnlp-main.1269
%U https://aclanthology.org/2025.emnlp-main.1269/
%U https://doi.org/10.18653/v1/2025.emnlp-main.1269
%P 24970-24984
Markdown (Informal)
[Mahānāma: A Unique Testbed for Literary Entity Discovery and Linking](https://aclanthology.org/2025.emnlp-main.1269/) (Sarkar et al., EMNLP 2025)
ACL
- Sujoy Sarkar, Gourav Sarkar, Manoj Balaji Jagadeeshan, Jivnesh Sandhan, Amrith Krishna, and Pawan Goyal. 2025. Mahānāma: A Unique Testbed for Literary Entity Discovery and Linking. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 24970–24984, Suzhou, China. Association for Computational Linguistics.