@inproceedings{tandon-etal-2020-dataset,
title = "A Dataset for Tracking Entities in Open Domain Procedural Text",
author = "Tandon, Niket and
Sakaguchi, Keisuke and
Dalvi, Bhavana and
Rajagopal, Dheeraj and
Clark, Peter and
Guerquin, Michal and
Richardson, Kyle and
Hovy, Eduard",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.520",
doi = "10.18653/v1/2020.emnlp-main.520",
pages = "6408--6417",
abstract = "We present the first dataset for tracking state changes in procedural text from arbitrary domains by using an unrestricted (open) vocabulary. For example, in a text describing fog removal using potatoes, a car window may transition between being foggy, sticky, opaque, and clear. Previous formulations of this task provide the text and entities involved, and ask how those entities change for just a small, pre-defined set of attributes (e.g., location), limiting their fidelity. Our solution is a new task formulation where given just a procedural text as input, the task is to generate a set of state change tuples (entity, attribute, before-state, after-state) for each step, where the entity, attribute, and state values must be predicted from an open vocabulary. Using crowdsourcing, we create OPENPI, a high-quality (91.5{\%} coverage as judged by humans and completely vetted), and large-scale dataset comprising 29,928 state changes over 4,050 sentences from 810 procedural real-world paragraphs from WikiHow.com. A current state-of-the-art generation model on this task achieves 16.1{\%} F1 based on BLEU metric, leaving enough room for novel model architectures.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tandon-etal-2020-dataset">
<titleInfo>
<title>A Dataset for Tracking Entities in Open Domain Procedural Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Niket</namePart>
<namePart type="family">Tandon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keisuke</namePart>
<namePart type="family">Sakaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhavana</namePart>
<namePart type="family">Dalvi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dheeraj</namePart>
<namePart type="family">Rajagopal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Guerquin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Richardson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the first dataset for tracking state changes in procedural text from arbitrary domains by using an unrestricted (open) vocabulary. For example, in a text describing fog removal using potatoes, a car window may transition between being foggy, sticky, opaque, and clear. Previous formulations of this task provide the text and entities involved, and ask how those entities change for just a small, pre-defined set of attributes (e.g., location), limiting their fidelity. Our solution is a new task formulation where given just a procedural text as input, the task is to generate a set of state change tuples (entity, attribute, before-state, after-state) for each step, where the entity, attribute, and state values must be predicted from an open vocabulary. Using crowdsourcing, we create OPENPI, a high-quality (91.5% coverage as judged by humans and completely vetted), and large-scale dataset comprising 29,928 state changes over 4,050 sentences from 810 procedural real-world paragraphs from WikiHow.com. A current state-of-the-art generation model on this task achieves 16.1% F1 based on BLEU metric, leaving enough room for novel model architectures.</abstract>
<identifier type="citekey">tandon-etal-2020-dataset</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.520</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.520</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>6408</start>
<end>6417</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset for Tracking Entities in Open Domain Procedural Text
%A Tandon, Niket
%A Sakaguchi, Keisuke
%A Dalvi, Bhavana
%A Rajagopal, Dheeraj
%A Clark, Peter
%A Guerquin, Michal
%A Richardson, Kyle
%A Hovy, Eduard
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F tandon-etal-2020-dataset
%X We present the first dataset for tracking state changes in procedural text from arbitrary domains by using an unrestricted (open) vocabulary. For example, in a text describing fog removal using potatoes, a car window may transition between being foggy, sticky, opaque, and clear. Previous formulations of this task provide the text and entities involved, and ask how those entities change for just a small, pre-defined set of attributes (e.g., location), limiting their fidelity. Our solution is a new task formulation where given just a procedural text as input, the task is to generate a set of state change tuples (entity, attribute, before-state, after-state) for each step, where the entity, attribute, and state values must be predicted from an open vocabulary. Using crowdsourcing, we create OPENPI, a high-quality (91.5% coverage as judged by humans and completely vetted), and large-scale dataset comprising 29,928 state changes over 4,050 sentences from 810 procedural real-world paragraphs from WikiHow.com. A current state-of-the-art generation model on this task achieves 16.1% F1 based on BLEU metric, leaving enough room for novel model architectures.
%R 10.18653/v1/2020.emnlp-main.520
%U https://aclanthology.org/2020.emnlp-main.520
%U https://doi.org/10.18653/v1/2020.emnlp-main.520
%P 6408-6417
Markdown (Informal)
[A Dataset for Tracking Entities in Open Domain Procedural Text](https://aclanthology.org/2020.emnlp-main.520) (Tandon et al., EMNLP 2020)
ACL
- Niket Tandon, Keisuke Sakaguchi, Bhavana Dalvi, Dheeraj Rajagopal, Peter Clark, Michal Guerquin, Kyle Richardson, and Eduard Hovy. 2020. A Dataset for Tracking Entities in Open Domain Procedural Text. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 6408–6417, Online. Association for Computational Linguistics.