@inproceedings{prasad-etal-2019-dataset,
title = "Dataset Mention Extraction and Classification",
author = "Prasad, Animesh and
Si, Chenglei and
Kan, Min-Yen",
editor = "Nastase, Vivi and
Roth, Benjamin and
Dietz, Laura and
McCallum, Andrew",
booktitle = "Proceedings of the Workshop on Extracting Structured Knowledge from Scientific Publications",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2604",
doi = "10.18653/v1/W19-2604",
pages = "31--36",
abstract = "Datasets are integral artifacts of empirical scientific research. However, due to natural language variation, their recognition can be difficult and even when identified, can often be inconsistently referred across and within publications. We report our approach to the Coleridge Initiative{'}s Rich Context Competition, which tasks participants with identifying dataset surface forms (dataset mention extraction) and associating the extracted mention to its referred dataset (dataset classification). In this work, we propose various neural baselines and evaluate these model on one-plus and zero-shot classification scenarios. We further explore various joint learning approaches - exploring the synergy between the tasks - and report the issues with such techniques.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prasad-etal-2019-dataset">
<titleInfo>
<title>Dataset Mention Extraction and Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Animesh</namePart>
<namePart type="family">Prasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenglei</namePart>
<namePart type="family">Si</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Extracting Structured Knowledge from Scientific Publications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vivi</namePart>
<namePart type="family">Nastase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Dietz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">McCallum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Datasets are integral artifacts of empirical scientific research. However, due to natural language variation, their recognition can be difficult and even when identified, can often be inconsistently referred across and within publications. We report our approach to the Coleridge Initiative’s Rich Context Competition, which tasks participants with identifying dataset surface forms (dataset mention extraction) and associating the extracted mention to its referred dataset (dataset classification). In this work, we propose various neural baselines and evaluate these model on one-plus and zero-shot classification scenarios. We further explore various joint learning approaches - exploring the synergy between the tasks - and report the issues with such techniques.</abstract>
<identifier type="citekey">prasad-etal-2019-dataset</identifier>
<identifier type="doi">10.18653/v1/W19-2604</identifier>
<location>
<url>https://aclanthology.org/W19-2604</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>31</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dataset Mention Extraction and Classification
%A Prasad, Animesh
%A Si, Chenglei
%A Kan, Min-Yen
%Y Nastase, Vivi
%Y Roth, Benjamin
%Y Dietz, Laura
%Y McCallum, Andrew
%S Proceedings of the Workshop on Extracting Structured Knowledge from Scientific Publications
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F prasad-etal-2019-dataset
%X Datasets are integral artifacts of empirical scientific research. However, due to natural language variation, their recognition can be difficult and even when identified, can often be inconsistently referred across and within publications. We report our approach to the Coleridge Initiative’s Rich Context Competition, which tasks participants with identifying dataset surface forms (dataset mention extraction) and associating the extracted mention to its referred dataset (dataset classification). In this work, we propose various neural baselines and evaluate these model on one-plus and zero-shot classification scenarios. We further explore various joint learning approaches - exploring the synergy between the tasks - and report the issues with such techniques.
%R 10.18653/v1/W19-2604
%U https://aclanthology.org/W19-2604
%U https://doi.org/10.18653/v1/W19-2604
%P 31-36
Markdown (Informal)
[Dataset Mention Extraction and Classification](https://aclanthology.org/W19-2604) (Prasad et al., NAACL 2019)
ACL
- Animesh Prasad, Chenglei Si, and Min-Yen Kan. 2019. Dataset Mention Extraction and Classification. In Proceedings of the Workshop on Extracting Structured Knowledge from Scientific Publications, pages 31–36, Minneapolis, Minnesota. Association for Computational Linguistics.