@inproceedings{lahiri-etal-2024-tk,
title = "Few-{TK}: A Dataset for Few-shot Scientific Typed Keyphrase Recognition",
author = "Lahiri, Avishek and
Sarkar, Pratyay and
Sen, Medha and
Sanyal, Debarshi Kumar and
Mukherjee, Imon",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.253/",
doi = "10.18653/v1/2024.findings-naacl.253",
pages = "4011--4025",
abstract = "Scientific texts are distinctive from ordinary texts in quite a few aspects like their vocabulary and discourse structure. Consequently, Information Extraction (IE) tasks for scientific texts come with their own set of challenges. The classical definition of Named Entities restricts the inclusion of all scientific terms under its hood, which is why previous works have used the terms Named Entities and Keyphrases interchangeably. We suggest the rechristening of Named Entities for the scientific domain as Typed Keyphrases (TK), broadening their scope. We advocate for exploring this task in the few-shot domain due to the scarcity of labeled scientific IE data. Currently, no dataset exists for few-shot scientific Typed Keyphrase Recognition. To address this gap, we develop an annotation schema and present Few-TK, a dataset in the AI/ML field that includes scientific Typed Keyphrase annotations on abstracts of 500 research papers. To the best of our knowledge, this is the introductory few-shot Typed Keyphrase recognition dataset and only the second dataset structured specifically for few-shot NER, after Few-NERD. We report the results of several few-shot sequence-labelling models applied to our dataset. The data and code are available at https://github.com/AvishekLahiri/Few{\_}TK.git"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lahiri-etal-2024-tk">
<titleInfo>
<title>Few-TK: A Dataset for Few-shot Scientific Typed Keyphrase Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Avishek</namePart>
<namePart type="family">Lahiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pratyay</namePart>
<namePart type="family">Sarkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Medha</namePart>
<namePart type="family">Sen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debarshi</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Sanyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imon</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scientific texts are distinctive from ordinary texts in quite a few aspects like their vocabulary and discourse structure. Consequently, Information Extraction (IE) tasks for scientific texts come with their own set of challenges. The classical definition of Named Entities restricts the inclusion of all scientific terms under its hood, which is why previous works have used the terms Named Entities and Keyphrases interchangeably. We suggest the rechristening of Named Entities for the scientific domain as Typed Keyphrases (TK), broadening their scope. We advocate for exploring this task in the few-shot domain due to the scarcity of labeled scientific IE data. Currently, no dataset exists for few-shot scientific Typed Keyphrase Recognition. To address this gap, we develop an annotation schema and present Few-TK, a dataset in the AI/ML field that includes scientific Typed Keyphrase annotations on abstracts of 500 research papers. To the best of our knowledge, this is the introductory few-shot Typed Keyphrase recognition dataset and only the second dataset structured specifically for few-shot NER, after Few-NERD. We report the results of several few-shot sequence-labelling models applied to our dataset. The data and code are available at https://github.com/AvishekLahiri/Few_TK.git</abstract>
<identifier type="citekey">lahiri-etal-2024-tk</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.253</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.253/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>4011</start>
<end>4025</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Few-TK: A Dataset for Few-shot Scientific Typed Keyphrase Recognition
%A Lahiri, Avishek
%A Sarkar, Pratyay
%A Sen, Medha
%A Sanyal, Debarshi Kumar
%A Mukherjee, Imon
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F lahiri-etal-2024-tk
%X Scientific texts are distinctive from ordinary texts in quite a few aspects like their vocabulary and discourse structure. Consequently, Information Extraction (IE) tasks for scientific texts come with their own set of challenges. The classical definition of Named Entities restricts the inclusion of all scientific terms under its hood, which is why previous works have used the terms Named Entities and Keyphrases interchangeably. We suggest the rechristening of Named Entities for the scientific domain as Typed Keyphrases (TK), broadening their scope. We advocate for exploring this task in the few-shot domain due to the scarcity of labeled scientific IE data. Currently, no dataset exists for few-shot scientific Typed Keyphrase Recognition. To address this gap, we develop an annotation schema and present Few-TK, a dataset in the AI/ML field that includes scientific Typed Keyphrase annotations on abstracts of 500 research papers. To the best of our knowledge, this is the introductory few-shot Typed Keyphrase recognition dataset and only the second dataset structured specifically for few-shot NER, after Few-NERD. We report the results of several few-shot sequence-labelling models applied to our dataset. The data and code are available at https://github.com/AvishekLahiri/Few_TK.git
%R 10.18653/v1/2024.findings-naacl.253
%U https://aclanthology.org/2024.findings-naacl.253/
%U https://doi.org/10.18653/v1/2024.findings-naacl.253
%P 4011-4025
Markdown (Informal)
[Few-TK: A Dataset for Few-shot Scientific Typed Keyphrase Recognition](https://aclanthology.org/2024.findings-naacl.253/) (Lahiri et al., Findings 2024)
ACL