@inproceedings{rohan-etal-2023-bencoref,
title = "{B}en{C}oref: A Multi-Domain Dataset of Nominal Phrases and Pronominal Reference Annotations",
author = "Rohan, Shadman and
Hossain, Mojammel and
Rashid, Mohammad and
Mohammed, Nabeel",
editor = "Prange, Jakob and
Friedrich, Annemarie",
booktitle = "Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.law-1.11",
doi = "10.18653/v1/2023.law-1.11",
pages = "104--117",
abstract = "Coreference Resolution is a well studied problem in NLP. While widely studied for English and other resource-rich languages, research on coreference resolution in Bengali largely remains unexplored due to the absence of relevant datasets. Bengali, being a low-resource language, exhibits greater morphological richness compared to English. In this article, we introduce a new dataset, BenCoref, comprising coreference annotations for Bengali texts gathered from four distinct domains. This relatively small dataset contains 5200 mention annotations forming 502 mention clusters within 48,569 tokens. We describe the process of creating this dataset and report performance of multiple models trained using BenCoref. We anticipate that our work sheds some light on the variations in coreference phenomena across multiple domains in Bengali and encourages the development of additional resources for Bengali. Furthermore, we found poor crosslingual performance at zero-shot setting from English, highlighting the need for more language-specific resources for this task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rohan-etal-2023-bencoref">
<titleInfo>
<title>BenCoref: A Multi-Domain Dataset of Nominal Phrases and Pronominal Reference Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shadman</namePart>
<namePart type="family">Rohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mojammel</namePart>
<namePart type="family">Hossain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Rashid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabeel</namePart>
<namePart type="family">Mohammed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakob</namePart>
<namePart type="family">Prange</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Coreference Resolution is a well studied problem in NLP. While widely studied for English and other resource-rich languages, research on coreference resolution in Bengali largely remains unexplored due to the absence of relevant datasets. Bengali, being a low-resource language, exhibits greater morphological richness compared to English. In this article, we introduce a new dataset, BenCoref, comprising coreference annotations for Bengali texts gathered from four distinct domains. This relatively small dataset contains 5200 mention annotations forming 502 mention clusters within 48,569 tokens. We describe the process of creating this dataset and report performance of multiple models trained using BenCoref. We anticipate that our work sheds some light on the variations in coreference phenomena across multiple domains in Bengali and encourages the development of additional resources for Bengali. Furthermore, we found poor crosslingual performance at zero-shot setting from English, highlighting the need for more language-specific resources for this task.</abstract>
<identifier type="citekey">rohan-etal-2023-bencoref</identifier>
<identifier type="doi">10.18653/v1/2023.law-1.11</identifier>
<location>
<url>https://aclanthology.org/2023.law-1.11</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>104</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BenCoref: A Multi-Domain Dataset of Nominal Phrases and Pronominal Reference Annotations
%A Rohan, Shadman
%A Hossain, Mojammel
%A Rashid, Mohammad
%A Mohammed, Nabeel
%Y Prange, Jakob
%Y Friedrich, Annemarie
%S Proceedings of the 17th Linguistic Annotation Workshop (LAW-XVII)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F rohan-etal-2023-bencoref
%X Coreference Resolution is a well studied problem in NLP. While widely studied for English and other resource-rich languages, research on coreference resolution in Bengali largely remains unexplored due to the absence of relevant datasets. Bengali, being a low-resource language, exhibits greater morphological richness compared to English. In this article, we introduce a new dataset, BenCoref, comprising coreference annotations for Bengali texts gathered from four distinct domains. This relatively small dataset contains 5200 mention annotations forming 502 mention clusters within 48,569 tokens. We describe the process of creating this dataset and report performance of multiple models trained using BenCoref. We anticipate that our work sheds some light on the variations in coreference phenomena across multiple domains in Bengali and encourages the development of additional resources for Bengali. Furthermore, we found poor crosslingual performance at zero-shot setting from English, highlighting the need for more language-specific resources for this task.
%R 10.18653/v1/2023.law-1.11
%U https://aclanthology.org/2023.law-1.11
%U https://doi.org/10.18653/v1/2023.law-1.11
%P 104-117
Markdown (Informal)
[BenCoref: A Multi-Domain Dataset of Nominal Phrases and Pronominal Reference Annotations](https://aclanthology.org/2023.law-1.11) (Rohan et al., LAW 2023)
ACL