@inproceedings{berg-kirkpatrick-spokoyny-2020-empirical,
title = "An Empirical Investigation of Contextualized Number Prediction",
author = "Berg-Kirkpatrick, Taylor and
Spokoyny, Daniel",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.385",
doi = "10.18653/v1/2020.emnlp-main.385",
pages = "4754--4764",
abstract = "We conduct a large scale empirical investigation of contextualized number prediction in running text. Specifically, we consider two tasks: (1)masked number prediction{--} predict-ing a missing numerical value within a sentence, and (2)numerical anomaly detection{--}detecting an errorful numeric value within a sentence. We experiment with novel combinations of contextual encoders and output distributions over the real number line. Specifically, we introduce a suite of output distribution parameterizations that incorporate latent variables to add expressivity and better fit the natural distribution of numeric values in running text, and combine them with both recur-rent and transformer-based encoder architectures. We evaluate these models on two numeric datasets in the financial and scientific domain. Our findings show that output distributions that incorporate discrete latent variables and allow for multiple modes outperform simple flow-based counterparts on all datasets, yielding more accurate numerical pre-diction and anomaly detection. We also show that our models effectively utilize textual con-text and benefit from general-purpose unsupervised pretraining.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="berg-kirkpatrick-spokoyny-2020-empirical">
<titleInfo>
<title>An Empirical Investigation of Contextualized Number Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Taylor</namePart>
<namePart type="family">Berg-Kirkpatrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Spokoyny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We conduct a large scale empirical investigation of contextualized number prediction in running text. Specifically, we consider two tasks: (1)masked number prediction– predict-ing a missing numerical value within a sentence, and (2)numerical anomaly detection–detecting an errorful numeric value within a sentence. We experiment with novel combinations of contextual encoders and output distributions over the real number line. Specifically, we introduce a suite of output distribution parameterizations that incorporate latent variables to add expressivity and better fit the natural distribution of numeric values in running text, and combine them with both recur-rent and transformer-based encoder architectures. We evaluate these models on two numeric datasets in the financial and scientific domain. Our findings show that output distributions that incorporate discrete latent variables and allow for multiple modes outperform simple flow-based counterparts on all datasets, yielding more accurate numerical pre-diction and anomaly detection. We also show that our models effectively utilize textual con-text and benefit from general-purpose unsupervised pretraining.</abstract>
<identifier type="citekey">berg-kirkpatrick-spokoyny-2020-empirical</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.385</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.385</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>4754</start>
<end>4764</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Empirical Investigation of Contextualized Number Prediction
%A Berg-Kirkpatrick, Taylor
%A Spokoyny, Daniel
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F berg-kirkpatrick-spokoyny-2020-empirical
%X We conduct a large scale empirical investigation of contextualized number prediction in running text. Specifically, we consider two tasks: (1)masked number prediction– predict-ing a missing numerical value within a sentence, and (2)numerical anomaly detection–detecting an errorful numeric value within a sentence. We experiment with novel combinations of contextual encoders and output distributions over the real number line. Specifically, we introduce a suite of output distribution parameterizations that incorporate latent variables to add expressivity and better fit the natural distribution of numeric values in running text, and combine them with both recur-rent and transformer-based encoder architectures. We evaluate these models on two numeric datasets in the financial and scientific domain. Our findings show that output distributions that incorporate discrete latent variables and allow for multiple modes outperform simple flow-based counterparts on all datasets, yielding more accurate numerical pre-diction and anomaly detection. We also show that our models effectively utilize textual con-text and benefit from general-purpose unsupervised pretraining.
%R 10.18653/v1/2020.emnlp-main.385
%U https://aclanthology.org/2020.emnlp-main.385
%U https://doi.org/10.18653/v1/2020.emnlp-main.385
%P 4754-4764
Markdown (Informal)
[An Empirical Investigation of Contextualized Number Prediction](https://aclanthology.org/2020.emnlp-main.385) (Berg-Kirkpatrick & Spokoyny, EMNLP 2020)
ACL