@article{anderson-gomez-rodriguez-2022-impact,
title = "The Impact of Edge Displacement {V}aserstein Distance on {UD} Parsing Performance",
author = "Anderson, Mark and
G{\'o}mez-Rodr{\'\i}guez, Carlos",
journal = "Computational Linguistics",
volume = "48",
number = "3",
month = sep,
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.cl-3.2",
doi = "10.1162/coli_a_00440",
pages = "517--554",
abstract = "We contribute to the discussion on parsing performance in NLP by introducing a measurement that evaluates the differences between the distributions of edge displacement (the directed distance of edges) seen in training and test data. We hypothesize that this measurement will be related to differences observed in parsing performance across treebanks. We motivate this by building upon previous work and then attempt to falsify this hypothesis by using a number of statistical methods. We establish that there is a statistical correlation between this measurement and parsing performance even when controlling for potential covariants. We then use this to establish a sampling technique that gives us an adversarial and complementary split. This gives an idea of the lower and upper bounds of parsing systems for a given treebank in lieu of freshly sampled data. In a broader sense, the methodology presented here can act as a reference for future correlation-based exploratory work in NLP.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="anderson-gomez-rodriguez-2022-impact">
<titleInfo>
<title>The Impact of Edge Displacement Vaserstein Distance on UD Parsing Performance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Gómez-Rodríguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We contribute to the discussion on parsing performance in NLP by introducing a measurement that evaluates the differences between the distributions of edge displacement (the directed distance of edges) seen in training and test data. We hypothesize that this measurement will be related to differences observed in parsing performance across treebanks. We motivate this by building upon previous work and then attempt to falsify this hypothesis by using a number of statistical methods. We establish that there is a statistical correlation between this measurement and parsing performance even when controlling for potential covariants. We then use this to establish a sampling technique that gives us an adversarial and complementary split. This gives an idea of the lower and upper bounds of parsing systems for a given treebank in lieu of freshly sampled data. In a broader sense, the methodology presented here can act as a reference for future correlation-based exploratory work in NLP.</abstract>
<identifier type="citekey">anderson-gomez-rodriguez-2022-impact</identifier>
<identifier type="doi">10.1162/coli_a_00440</identifier>
<location>
<url>https://aclanthology.org/2022.cl-3.2</url>
</location>
<part>
<date>2022-09</date>
<detail type="volume"><number>48</number></detail>
<detail type="issue"><number>3</number></detail>
<extent unit="page">
<start>517</start>
<end>554</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T The Impact of Edge Displacement Vaserstein Distance on UD Parsing Performance
%A Anderson, Mark
%A Gómez-Rodríguez, Carlos
%J Computational Linguistics
%D 2022
%8 September
%V 48
%N 3
%I MIT Press
%C Cambridge, MA
%F anderson-gomez-rodriguez-2022-impact
%X We contribute to the discussion on parsing performance in NLP by introducing a measurement that evaluates the differences between the distributions of edge displacement (the directed distance of edges) seen in training and test data. We hypothesize that this measurement will be related to differences observed in parsing performance across treebanks. We motivate this by building upon previous work and then attempt to falsify this hypothesis by using a number of statistical methods. We establish that there is a statistical correlation between this measurement and parsing performance even when controlling for potential covariants. We then use this to establish a sampling technique that gives us an adversarial and complementary split. This gives an idea of the lower and upper bounds of parsing systems for a given treebank in lieu of freshly sampled data. In a broader sense, the methodology presented here can act as a reference for future correlation-based exploratory work in NLP.
%R 10.1162/coli_a_00440
%U https://aclanthology.org/2022.cl-3.2
%U https://doi.org/10.1162/coli_a_00440
%P 517-554
Markdown (Informal)
[The Impact of Edge Displacement Vaserstein Distance on UD Parsing Performance](https://aclanthology.org/2022.cl-3.2) (Anderson & Gómez-Rodríguez, CL 2022)
ACL