@article{lowe-etal-2017-training,
title = "Training End-to-End Dialogue Systems with the {U}buntu Dialogue Corpus",
author = "Lowe, Ryan and
Pow, Nissan and
Serban, Iulian Vlad and
Charlin, Laurent and
Liu, Chia-Wei and
Pineau, Joelle",
editor = "Stent, Amanda and
Taboada, Maite and
Fern{\'a}ndez, Raquel and
Traum, David and
Poesio, Massimo and
Eugenio, Barbara Di and
Stede, Manfred",
journal = "Dialogue {\&} Discourse",
volume = "8",
month = jan,
year = "2017",
address = "Bielefeld, Germany",
publisher = "University of Bielefeld",
url = "https://aclanthology.org/2017.dnd-8.14/",
doi = "10.5087/dad.2017.102",
pages = "31--65",
abstract = "In this paper, we construct and train end-to-end neural network-based dialogue systems usingan updated version of the recent Ubuntu Dialogue Corpus, a dataset containing almost 1 million multi-turn dialogues, with a total of over 7 million utterances and 100 million words. This dataset is interesting because of its size, long context lengths, and technical nature; thus, it can be used to train large models directly from data with minimal feature engineering, which can be both time consuming and expensive. We provide baselines in two different environments: one where models are trained to maximize the log-likelihood of a generated utterance conditioned on the context of the conversation, and one where models are trained to select the correct next response from a list of candidate responses. These are both evaluated on a recall task that we call Next Utterance Classification (NUC), as well as other generation-specific metrics. Finally, we provide a qualitative error analysis to help determine the most promising directions for future research on the Ubuntu Dialogue Corpus, and for end-to-end dialogue systems in general."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lowe-etal-2017-training">
<titleInfo>
<title>Training End-to-End Dialogue Systems with the Ubuntu Dialogue Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Lowe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nissan</namePart>
<namePart type="family">Pow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iulian</namePart>
<namePart type="given">Vlad</namePart>
<namePart type="family">Serban</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Charlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chia-Wei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joelle</namePart>
<namePart type="family">Pineau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Dialogue & Discourse</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>University of Bielefeld</publisher>
<place>
<placeTerm type="text">Bielefeld, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>In this paper, we construct and train end-to-end neural network-based dialogue systems usingan updated version of the recent Ubuntu Dialogue Corpus, a dataset containing almost 1 million multi-turn dialogues, with a total of over 7 million utterances and 100 million words. This dataset is interesting because of its size, long context lengths, and technical nature; thus, it can be used to train large models directly from data with minimal feature engineering, which can be both time consuming and expensive. We provide baselines in two different environments: one where models are trained to maximize the log-likelihood of a generated utterance conditioned on the context of the conversation, and one where models are trained to select the correct next response from a list of candidate responses. These are both evaluated on a recall task that we call Next Utterance Classification (NUC), as well as other generation-specific metrics. Finally, we provide a qualitative error analysis to help determine the most promising directions for future research on the Ubuntu Dialogue Corpus, and for end-to-end dialogue systems in general.</abstract>
<identifier type="citekey">lowe-etal-2017-training</identifier>
<identifier type="doi">10.5087/dad.2017.102</identifier>
<location>
<url>https://aclanthology.org/2017.dnd-8.14/</url>
</location>
<part>
<date>2017-01</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>31</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Training End-to-End Dialogue Systems with the Ubuntu Dialogue Corpus
%A Lowe, Ryan
%A Pow, Nissan
%A Serban, Iulian Vlad
%A Charlin, Laurent
%A Liu, Chia-Wei
%A Pineau, Joelle
%J Dialogue & Discourse
%D 2017
%8 January
%V 8
%I University of Bielefeld
%C Bielefeld, Germany
%F lowe-etal-2017-training
%X In this paper, we construct and train end-to-end neural network-based dialogue systems usingan updated version of the recent Ubuntu Dialogue Corpus, a dataset containing almost 1 million multi-turn dialogues, with a total of over 7 million utterances and 100 million words. This dataset is interesting because of its size, long context lengths, and technical nature; thus, it can be used to train large models directly from data with minimal feature engineering, which can be both time consuming and expensive. We provide baselines in two different environments: one where models are trained to maximize the log-likelihood of a generated utterance conditioned on the context of the conversation, and one where models are trained to select the correct next response from a list of candidate responses. These are both evaluated on a recall task that we call Next Utterance Classification (NUC), as well as other generation-specific metrics. Finally, we provide a qualitative error analysis to help determine the most promising directions for future research on the Ubuntu Dialogue Corpus, and for end-to-end dialogue systems in general.
%R 10.5087/dad.2017.102
%U https://aclanthology.org/2017.dnd-8.14/
%U https://doi.org/10.5087/dad.2017.102
%P 31-65
Markdown (Informal)
[Training End-to-End Dialogue Systems with the Ubuntu Dialogue Corpus](https://aclanthology.org/2017.dnd-8.14/) (Lowe et al., DND 2017)
ACL