@inproceedings{ghosh-etal-2024-toward,
title = "Toward Reliable Ad-hoc Scientific Information Extraction: A Case Study on Two Materials Dataset",
author = "Ghosh, Satanu and
Brodnik, Neal and
Frey, Carolina and
Holgate, Collin and
Pollock, Tresa and
Daly, Samantha and
Carton, Samuel",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.897/",
doi = "10.18653/v1/2024.findings-acl.897",
pages = "15109--15123",
abstract = "We explore the ability of GPT-4 to perform ad-hoc schema-based information extraction from scientific literature. We assess specifically whether it can, with a basic one-shot prompting approach over the full text of the included manusciprts, replicate two existing material science datasets, one pertaining to multi-principal element alloys (MPEAs), and one to silicate diffusion. We collaborate with materials scientists to perform a detailed manual error analysis to assess where and why the model struggles to faithfully extract the desired information, and draw on their insights to suggest research directions to address this broadly important task."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ghosh-etal-2024-toward">
<titleInfo>
<title>Toward Reliable Ad-hoc Scientific Information Extraction: A Case Study on Two Materials Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Satanu</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neal</namePart>
<namePart type="family">Brodnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Frey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Collin</namePart>
<namePart type="family">Holgate</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tresa</namePart>
<namePart type="family">Pollock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samantha</namePart>
<namePart type="family">Daly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Carton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We explore the ability of GPT-4 to perform ad-hoc schema-based information extraction from scientific literature. We assess specifically whether it can, with a basic one-shot prompting approach over the full text of the included manusciprts, replicate two existing material science datasets, one pertaining to multi-principal element alloys (MPEAs), and one to silicate diffusion. We collaborate with materials scientists to perform a detailed manual error analysis to assess where and why the model struggles to faithfully extract the desired information, and draw on their insights to suggest research directions to address this broadly important task.</abstract>
<identifier type="citekey">ghosh-etal-2024-toward</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.897</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.897/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>15109</start>
<end>15123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Toward Reliable Ad-hoc Scientific Information Extraction: A Case Study on Two Materials Dataset
%A Ghosh, Satanu
%A Brodnik, Neal
%A Frey, Carolina
%A Holgate, Collin
%A Pollock, Tresa
%A Daly, Samantha
%A Carton, Samuel
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F ghosh-etal-2024-toward
%X We explore the ability of GPT-4 to perform ad-hoc schema-based information extraction from scientific literature. We assess specifically whether it can, with a basic one-shot prompting approach over the full text of the included manusciprts, replicate two existing material science datasets, one pertaining to multi-principal element alloys (MPEAs), and one to silicate diffusion. We collaborate with materials scientists to perform a detailed manual error analysis to assess where and why the model struggles to faithfully extract the desired information, and draw on their insights to suggest research directions to address this broadly important task.
%R 10.18653/v1/2024.findings-acl.897
%U https://aclanthology.org/2024.findings-acl.897/
%U https://doi.org/10.18653/v1/2024.findings-acl.897
%P 15109-15123
Markdown (Informal)
[Toward Reliable Ad-hoc Scientific Information Extraction: A Case Study on Two Materials Dataset](https://aclanthology.org/2024.findings-acl.897/) (Ghosh et al., Findings 2024)
ACL