@inproceedings{chaitanya-etal-2023-annotated,
title = "An Annotated Corpus for Realis Event Detection in Short Stories Written in {E}nglish and Low Resource {A}ssamese Language",
author = "Kirti, Chaitanya and
Choudhury, Pankaj and
Anand, Ashish and
Guha, Prithwijit",
editor = "D. Pawar, Jyoti and
Lalitha Devi, Sobha",
booktitle = "Proceedings of the 20th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2023",
address = "Goa University, Goa, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2023.icon-1.8/",
pages = "72--81",
abstract = "This paper presents an annotated corpora of Assamese and English short stories for event trigger detection. This marks a pioneering endeavor in short stories, contributing to developing resources for this genre, especially in the low-resource Assamese language. In the process, 200 short stories were manually annotated in both Assamese and English. The dataset was evaluated and several models were compared for predicting events that are actually happening, i.e., realis events. However, it is expensive to develop manually annotated language resources, especially when the text requires specialist knowledge to interpret. In this regard, TagIT, an automated event annotation tool, is introduced. TagIT is designed to facilitate our objective of expanding the dataset from 200 to 1,000. The best-performing model was employed in TagIT to automate the event annotation process. Extensive experiments were conducted to evaluate the quality of the expanded dataset. This study further illustrates how the combination of an automatic annotation tool and human-in-the-loop participation significantly reduces the time needed to generate a high-quality dataset."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chaitanya-etal-2023-annotated">
<titleInfo>
<title>An Annotated Corpus for Realis Event Detection in Short Stories Written in English and Low Resource Assamese Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chaitanya</namePart>
<namePart type="family">Kirti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pankaj</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Anand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prithwijit</namePart>
<namePart type="family">Guha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jyoti</namePart>
<namePart type="family">D. Pawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Goa University, Goa, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents an annotated corpora of Assamese and English short stories for event trigger detection. This marks a pioneering endeavor in short stories, contributing to developing resources for this genre, especially in the low-resource Assamese language. In the process, 200 short stories were manually annotated in both Assamese and English. The dataset was evaluated and several models were compared for predicting events that are actually happening, i.e., realis events. However, it is expensive to develop manually annotated language resources, especially when the text requires specialist knowledge to interpret. In this regard, TagIT, an automated event annotation tool, is introduced. TagIT is designed to facilitate our objective of expanding the dataset from 200 to 1,000. The best-performing model was employed in TagIT to automate the event annotation process. Extensive experiments were conducted to evaluate the quality of the expanded dataset. This study further illustrates how the combination of an automatic annotation tool and human-in-the-loop participation significantly reduces the time needed to generate a high-quality dataset.</abstract>
<identifier type="citekey">chaitanya-etal-2023-annotated</identifier>
<location>
<url>https://aclanthology.org/2023.icon-1.8/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>72</start>
<end>81</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Annotated Corpus for Realis Event Detection in Short Stories Written in English and Low Resource Assamese Language
%A Kirti, Chaitanya
%A Choudhury, Pankaj
%A Anand, Ashish
%A Guha, Prithwijit
%Y D. Pawar, Jyoti
%Y Lalitha Devi, Sobha
%S Proceedings of the 20th International Conference on Natural Language Processing (ICON)
%D 2023
%8 December
%I NLP Association of India (NLPAI)
%C Goa University, Goa, India
%F chaitanya-etal-2023-annotated
%X This paper presents an annotated corpora of Assamese and English short stories for event trigger detection. This marks a pioneering endeavor in short stories, contributing to developing resources for this genre, especially in the low-resource Assamese language. In the process, 200 short stories were manually annotated in both Assamese and English. The dataset was evaluated and several models were compared for predicting events that are actually happening, i.e., realis events. However, it is expensive to develop manually annotated language resources, especially when the text requires specialist knowledge to interpret. In this regard, TagIT, an automated event annotation tool, is introduced. TagIT is designed to facilitate our objective of expanding the dataset from 200 to 1,000. The best-performing model was employed in TagIT to automate the event annotation process. Extensive experiments were conducted to evaluate the quality of the expanded dataset. This study further illustrates how the combination of an automatic annotation tool and human-in-the-loop participation significantly reduces the time needed to generate a high-quality dataset.
%U https://aclanthology.org/2023.icon-1.8/
%P 72-81
Markdown (Informal)
[An Annotated Corpus for Realis Event Detection in Short Stories Written in English and Low Resource Assamese Language](https://aclanthology.org/2023.icon-1.8/) (Kirti et al., ICON 2023)
ACL