@inproceedings{manuvinakurike-etal-2017-using,
    title = "Using Reinforcement Learning to Model Incrementality in a Fast-Paced Dialogue Game",
    author = "Manuvinakurike, Ramesh  and
      DeVault, David  and
      Georgila, Kallirroi",
    editor = "Jokinen, Kristiina  and
      Stede, Manfred  and
      DeVault, David  and
      Louis, Annie",
    booktitle = "Proceedings of the 18th Annual {SIG}dial Meeting on Discourse and Dialogue",
    month = aug,
    year = "2017",
    address = {Saarbr{\"u}cken, Germany},
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W17-5539/",
    doi = "10.18653/v1/W17-5539",
    pages = "331--341",
    abstract = "We apply Reinforcement Learning (RL) to the problem of incremental dialogue policy learning in the context of a fast-paced dialogue game. We compare the policy learned by RL with a high-performance baseline policy which has been shown to perform very efficiently (nearly as well as humans) in this dialogue game. The RL policy outperforms the baseline policy in offline simulations (based on real user data). We provide a detailed comparison of the RL policy and the baseline policy, including information about how much effort and time it took to develop each one of them. We also highlight the cases where the RL policy performs better, and show that understanding the RL policy can provide valuable insights which can inform the creation of an even better rule-based policy."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manuvinakurike-etal-2017-using">
    <titleInfo>
        <title>Using Reinforcement Learning to Model Incrementality in a Fast-Paced Dialogue Game</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Ramesh</namePart>
        <namePart type="family">Manuvinakurike</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">David</namePart>
        <namePart type="family">DeVault</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Kallirroi</namePart>
        <namePart type="family">Georgila</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2017-08</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 18th Annual SIGdial Meeting on Discourse and Dialogue</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Kristiina</namePart>
            <namePart type="family">Jokinen</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Manfred</namePart>
            <namePart type="family">Stede</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">David</namePart>
            <namePart type="family">DeVault</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Annie</namePart>
            <namePart type="family">Louis</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Saarbrücken, Germany</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>We apply Reinforcement Learning (RL) to the problem of incremental dialogue policy learning in the context of a fast-paced dialogue game. We compare the policy learned by RL with a high-performance baseline policy which has been shown to perform very efficiently (nearly as well as humans) in this dialogue game. The RL policy outperforms the baseline policy in offline simulations (based on real user data). We provide a detailed comparison of the RL policy and the baseline policy, including information about how much effort and time it took to develop each one of them. We also highlight the cases where the RL policy performs better, and show that understanding the RL policy can provide valuable insights which can inform the creation of an even better rule-based policy.</abstract>
    <identifier type="citekey">manuvinakurike-etal-2017-using</identifier>
    <identifier type="doi">10.18653/v1/W17-5539</identifier>
    <location>
        <url>https://aclanthology.org/W17-5539/</url>
    </location>
    <part>
        <date>2017-08</date>
        <extent unit="page">
            <start>331</start>
            <end>341</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Reinforcement Learning to Model Incrementality in a Fast-Paced Dialogue Game
%A Manuvinakurike, Ramesh
%A DeVault, David
%A Georgila, Kallirroi
%Y Jokinen, Kristiina
%Y Stede, Manfred
%Y DeVault, David
%Y Louis, Annie
%S Proceedings of the 18th Annual SIGdial Meeting on Discourse and Dialogue
%D 2017
%8 August
%I Association for Computational Linguistics
%C Saarbrücken, Germany
%F manuvinakurike-etal-2017-using
%X We apply Reinforcement Learning (RL) to the problem of incremental dialogue policy learning in the context of a fast-paced dialogue game. We compare the policy learned by RL with a high-performance baseline policy which has been shown to perform very efficiently (nearly as well as humans) in this dialogue game. The RL policy outperforms the baseline policy in offline simulations (based on real user data). We provide a detailed comparison of the RL policy and the baseline policy, including information about how much effort and time it took to develop each one of them. We also highlight the cases where the RL policy performs better, and show that understanding the RL policy can provide valuable insights which can inform the creation of an even better rule-based policy.
%R 10.18653/v1/W17-5539
%U https://aclanthology.org/W17-5539/
%U https://doi.org/10.18653/v1/W17-5539
%P 331-341
Markdown (Informal)
[Using Reinforcement Learning to Model Incrementality in a Fast-Paced Dialogue Game](https://aclanthology.org/W17-5539/) (Manuvinakurike et al., SIGDIAL 2017)
ACL