@article{heck-etal-2022-robust,
title = "Robust Dialogue State Tracking with Weak Supervision and Sparse Data",
author = "Heck, Michael and
Lubis, Nurul and
van Niekerk, Carel and
Feng, Shutong and
Geishauser, Christian and
Lin, Hsien-Chin and
Ga{\v{s}}i{\'c}, Milica",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "10",
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.tacl-1.68",
doi = "10.1162/tacl_a_00513",
pages = "1175--1192",
abstract = "Generalizing dialogue state tracking (DST) to new data is especially challenging due to the strong reliance on abundant and fine-grained supervision during training. Sample sparsity, distributional shift, and the occurrence of new concepts and topics frequently lead to severe performance degradation during inference. In this paper we propose a training strategy to build extractive DST models without the need for fine-grained manual span labels. Two novel input-level dropout methods mitigate the negative impact of sample sparsity. We propose a new model architecture with a unified encoder that supports value as well as slot independence by leveraging the attention mechanism. We combine the strengths of triple copy strategy DST and value matching to benefit from complementary predictions without violating the principle of ontology independence. Our experiments demonstrate that an extractive DST model can be trained without manual span labels. Our architecture and training strategies improve robustness towards sample sparsity, new concepts, and topics, leading to state-of-the-art performance on a range of benchmarks. We further highlight our model{'}s ability to effectively learn from non-dialogue data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="heck-etal-2022-robust">
<titleInfo>
<title>Robust Dialogue State Tracking with Weak Supervision and Sparse Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Heck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nurul</namePart>
<namePart type="family">Lubis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carel</namePart>
<namePart type="family">van Niekerk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shutong</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Geishauser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsien-Chin</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milica</namePart>
<namePart type="family">Gašić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Generalizing dialogue state tracking (DST) to new data is especially challenging due to the strong reliance on abundant and fine-grained supervision during training. Sample sparsity, distributional shift, and the occurrence of new concepts and topics frequently lead to severe performance degradation during inference. In this paper we propose a training strategy to build extractive DST models without the need for fine-grained manual span labels. Two novel input-level dropout methods mitigate the negative impact of sample sparsity. We propose a new model architecture with a unified encoder that supports value as well as slot independence by leveraging the attention mechanism. We combine the strengths of triple copy strategy DST and value matching to benefit from complementary predictions without violating the principle of ontology independence. Our experiments demonstrate that an extractive DST model can be trained without manual span labels. Our architecture and training strategies improve robustness towards sample sparsity, new concepts, and topics, leading to state-of-the-art performance on a range of benchmarks. We further highlight our model’s ability to effectively learn from non-dialogue data.</abstract>
<identifier type="citekey">heck-etal-2022-robust</identifier>
<identifier type="doi">10.1162/tacl_a_00513</identifier>
<location>
<url>https://aclanthology.org/2022.tacl-1.68</url>
</location>
<part>
<date>2022</date>
<detail type="volume"><number>10</number></detail>
<extent unit="page">
<start>1175</start>
<end>1192</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Robust Dialogue State Tracking with Weak Supervision and Sparse Data
%A Heck, Michael
%A Lubis, Nurul
%A van Niekerk, Carel
%A Feng, Shutong
%A Geishauser, Christian
%A Lin, Hsien-Chin
%A Gašić, Milica
%J Transactions of the Association for Computational Linguistics
%D 2022
%V 10
%I MIT Press
%C Cambridge, MA
%F heck-etal-2022-robust
%X Generalizing dialogue state tracking (DST) to new data is especially challenging due to the strong reliance on abundant and fine-grained supervision during training. Sample sparsity, distributional shift, and the occurrence of new concepts and topics frequently lead to severe performance degradation during inference. In this paper we propose a training strategy to build extractive DST models without the need for fine-grained manual span labels. Two novel input-level dropout methods mitigate the negative impact of sample sparsity. We propose a new model architecture with a unified encoder that supports value as well as slot independence by leveraging the attention mechanism. We combine the strengths of triple copy strategy DST and value matching to benefit from complementary predictions without violating the principle of ontology independence. Our experiments demonstrate that an extractive DST model can be trained without manual span labels. Our architecture and training strategies improve robustness towards sample sparsity, new concepts, and topics, leading to state-of-the-art performance on a range of benchmarks. We further highlight our model’s ability to effectively learn from non-dialogue data.
%R 10.1162/tacl_a_00513
%U https://aclanthology.org/2022.tacl-1.68
%U https://doi.org/10.1162/tacl_a_00513
%P 1175-1192
Markdown (Informal)
[Robust Dialogue State Tracking with Weak Supervision and Sparse Data](https://aclanthology.org/2022.tacl-1.68) (Heck et al., TACL 2022)
ACL