@inproceedings{stanojevic-2022-unbiased,
title = "Unbiased and Efficient Sampling of Dependency Trees",
author = "Stanojevi{\'c}, Milo{\v{s}}",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.110",
doi = "10.18653/v1/2022.emnlp-main.110",
pages = "1691--1706",
abstract = "Most computational models of dependency syntax consist of distributions over spanning trees. However, the majority of dependency treebanks require that every valid dependency tree has a single edge coming out of the ROOT node, a constraint that is not part of the definition of spanning trees. For this reason all standard inference algorithms for spanning trees are sub-optimal for inference over dependency trees.Zmigrod et al (2021) proposed algorithms for sampling with and without replacement from the dependency tree distribution that incorporate the single-root constraint. In this paper we show that their fastest algorithm for sampling with replacement, Wilson-RC, is in fact producing biased samples and we provide two alternatives that are unbiased. Additionally, we propose two algorithms (one incremental, one parallel) that reduce the asymptotic runtime of algorithm for sampling k trees without replacement to O(kn{\textasciicircum}3). These algorithms are both asymptotically and practically more efficient.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stanojevic-2022-unbiased">
<titleInfo>
<title>Unbiased and Efficient Sampling of Dependency Trees</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miloš</namePart>
<namePart type="family">Stanojević</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most computational models of dependency syntax consist of distributions over spanning trees. However, the majority of dependency treebanks require that every valid dependency tree has a single edge coming out of the ROOT node, a constraint that is not part of the definition of spanning trees. For this reason all standard inference algorithms for spanning trees are sub-optimal for inference over dependency trees.Zmigrod et al (2021) proposed algorithms for sampling with and without replacement from the dependency tree distribution that incorporate the single-root constraint. In this paper we show that their fastest algorithm for sampling with replacement, Wilson-RC, is in fact producing biased samples and we provide two alternatives that are unbiased. Additionally, we propose two algorithms (one incremental, one parallel) that reduce the asymptotic runtime of algorithm for sampling k trees without replacement to O(kn⌃3). These algorithms are both asymptotically and practically more efficient.</abstract>
<identifier type="citekey">stanojevic-2022-unbiased</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.110</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.110</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>1691</start>
<end>1706</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unbiased and Efficient Sampling of Dependency Trees
%A Stanojević, Miloš
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F stanojevic-2022-unbiased
%X Most computational models of dependency syntax consist of distributions over spanning trees. However, the majority of dependency treebanks require that every valid dependency tree has a single edge coming out of the ROOT node, a constraint that is not part of the definition of spanning trees. For this reason all standard inference algorithms for spanning trees are sub-optimal for inference over dependency trees.Zmigrod et al (2021) proposed algorithms for sampling with and without replacement from the dependency tree distribution that incorporate the single-root constraint. In this paper we show that their fastest algorithm for sampling with replacement, Wilson-RC, is in fact producing biased samples and we provide two alternatives that are unbiased. Additionally, we propose two algorithms (one incremental, one parallel) that reduce the asymptotic runtime of algorithm for sampling k trees without replacement to O(kn⌃3). These algorithms are both asymptotically and practically more efficient.
%R 10.18653/v1/2022.emnlp-main.110
%U https://aclanthology.org/2022.emnlp-main.110
%U https://doi.org/10.18653/v1/2022.emnlp-main.110
%P 1691-1706
Markdown (Informal)
[Unbiased and Efficient Sampling of Dependency Trees](https://aclanthology.org/2022.emnlp-main.110) (Stanojević, EMNLP 2022)
ACL
- Miloš Stanojević. 2022. Unbiased and Efficient Sampling of Dependency Trees. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 1691–1706, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.