@inproceedings{brinner-etal-2022-linking,
title = "Linking a Hypothesis Network From the Domain of Invasion Biology to a Corpus of Scientific Abstracts: The {INAS} Dataset",
author = "Brinner, Marc and
Heger, Tina and
Zarriess, Sina",
editor = "Ghosal, Tirthankar and
Blanco-Cuaresma, Sergi and
Accomazzi, Alberto and
Patton, Robert M. and
Grezes, Felix and
Allen, Thomas",
booktitle = "Proceedings of the first Workshop on Information Extraction from Scientific Publications",
month = nov,
year = "2022",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wiesp-1.5/",
doi = "10.18653/v1/2022.wiesp-1.5",
pages = "32--42",
abstract = "We investigate the problem of identifying the major hypothesis that is addressed in a scientific paper. To this end, we present a dataset from the domain of invasion biology that organizes a set of 954 papers into a network of fine-grained domain-specific categories of hypotheses. We carry out experiments on classifying abstracts according to these categories and present a pilot study on annotating hypothesis statements within the text. We find that hypothesis statements in our dataset are complex, varied and more or less explicit, and, importantly, spread over the whole abstract. Experiments with BERT-based classifiers show that these models are able to classify complex hypothesis statements to some extent, without being trained on sentence-level text span annotations."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brinner-etal-2022-linking">
<titleInfo>
<title>Linking a Hypothesis Network From the Domain of Invasion Biology to a Corpus of Scientific Abstracts: The INAS Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Brinner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tina</namePart>
<namePart type="family">Heger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarriess</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the first Workshop on Information Extraction from Scientific Publications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergi</namePart>
<namePart type="family">Blanco-Cuaresma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Accomazzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Patton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Grezes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Allen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the problem of identifying the major hypothesis that is addressed in a scientific paper. To this end, we present a dataset from the domain of invasion biology that organizes a set of 954 papers into a network of fine-grained domain-specific categories of hypotheses. We carry out experiments on classifying abstracts according to these categories and present a pilot study on annotating hypothesis statements within the text. We find that hypothesis statements in our dataset are complex, varied and more or less explicit, and, importantly, spread over the whole abstract. Experiments with BERT-based classifiers show that these models are able to classify complex hypothesis statements to some extent, without being trained on sentence-level text span annotations.</abstract>
<identifier type="citekey">brinner-etal-2022-linking</identifier>
<identifier type="doi">10.18653/v1/2022.wiesp-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.wiesp-1.5/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>32</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linking a Hypothesis Network From the Domain of Invasion Biology to a Corpus of Scientific Abstracts: The INAS Dataset
%A Brinner, Marc
%A Heger, Tina
%A Zarriess, Sina
%Y Ghosal, Tirthankar
%Y Blanco-Cuaresma, Sergi
%Y Accomazzi, Alberto
%Y Patton, Robert M.
%Y Grezes, Felix
%Y Allen, Thomas
%S Proceedings of the first Workshop on Information Extraction from Scientific Publications
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online
%F brinner-etal-2022-linking
%X We investigate the problem of identifying the major hypothesis that is addressed in a scientific paper. To this end, we present a dataset from the domain of invasion biology that organizes a set of 954 papers into a network of fine-grained domain-specific categories of hypotheses. We carry out experiments on classifying abstracts according to these categories and present a pilot study on annotating hypothesis statements within the text. We find that hypothesis statements in our dataset are complex, varied and more or less explicit, and, importantly, spread over the whole abstract. Experiments with BERT-based classifiers show that these models are able to classify complex hypothesis statements to some extent, without being trained on sentence-level text span annotations.
%R 10.18653/v1/2022.wiesp-1.5
%U https://aclanthology.org/2022.wiesp-1.5/
%U https://doi.org/10.18653/v1/2022.wiesp-1.5
%P 32-42
Markdown (Informal)
[Linking a Hypothesis Network From the Domain of Invasion Biology to a Corpus of Scientific Abstracts: The INAS Dataset](https://aclanthology.org/2022.wiesp-1.5/) (Brinner et al., WIESP 2022)
ACL