@inproceedings{lu-etal-2023-august,
title = "{AUGUST}: an Automatic Generation Understudy for Synthesizing Conversational Recommendation Datasets",
author = "Lu, Yu and
Bao, Junwei and
Ma, Zichen and
Han, Xiaoguang and
Wu, Youzheng and
Cui, Shuguang and
He, Xiaodong",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.670",
doi = "10.18653/v1/2023.findings-acl.670",
pages = "10538--10549",
abstract = "High-quality data is essential for conversational recommendation systems and serves as the cornerstone of the network architecture development and training strategy design. Existing works contribute heavy human efforts to manually labeling or designing and extending recommender dialogue templates. However, they suffer from: (i) the limited number of human annotators results in datasets can hardly capture rich and large-scale cases in the real world, (ii) the limited experience and knowledge of annotators accounts for the uninformative corpus and inappropriate recommendations. In this paper, we propose a novel automatic dataset synthesis approach that can generate large-scale and high-quality recommendation dialogues through a data2text generation process, where unstructured recommendation conversations are generated from structured graphs based on user-item information from the real world. In doing so, we comprehensively exploit: (i) rich personalized user profiles from traditional recommendation datasets, (ii) rich external knowledge from knowledge graphs, and (iii) the conversation ability contained in human-to-human conversational recommendation datasets. Extensive experiments validate the benefit brought by the automatically synthesized data under low-resource scenarios, and demonstrate the promising potential to facilitate developing a more effective conversational recommendation system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lu-etal-2023-august">
<titleInfo>
<title>AUGUST: an Automatic Generation Understudy for Synthesizing Conversational Recommendation Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junwei</namePart>
<namePart type="family">Bao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zichen</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoguang</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youzheng</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuguang</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>High-quality data is essential for conversational recommendation systems and serves as the cornerstone of the network architecture development and training strategy design. Existing works contribute heavy human efforts to manually labeling or designing and extending recommender dialogue templates. However, they suffer from: (i) the limited number of human annotators results in datasets can hardly capture rich and large-scale cases in the real world, (ii) the limited experience and knowledge of annotators accounts for the uninformative corpus and inappropriate recommendations. In this paper, we propose a novel automatic dataset synthesis approach that can generate large-scale and high-quality recommendation dialogues through a data2text generation process, where unstructured recommendation conversations are generated from structured graphs based on user-item information from the real world. In doing so, we comprehensively exploit: (i) rich personalized user profiles from traditional recommendation datasets, (ii) rich external knowledge from knowledge graphs, and (iii) the conversation ability contained in human-to-human conversational recommendation datasets. Extensive experiments validate the benefit brought by the automatically synthesized data under low-resource scenarios, and demonstrate the promising potential to facilitate developing a more effective conversational recommendation system.</abstract>
<identifier type="citekey">lu-etal-2023-august</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.670</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.670</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>10538</start>
<end>10549</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AUGUST: an Automatic Generation Understudy for Synthesizing Conversational Recommendation Datasets
%A Lu, Yu
%A Bao, Junwei
%A Ma, Zichen
%A Han, Xiaoguang
%A Wu, Youzheng
%A Cui, Shuguang
%A He, Xiaodong
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F lu-etal-2023-august
%X High-quality data is essential for conversational recommendation systems and serves as the cornerstone of the network architecture development and training strategy design. Existing works contribute heavy human efforts to manually labeling or designing and extending recommender dialogue templates. However, they suffer from: (i) the limited number of human annotators results in datasets can hardly capture rich and large-scale cases in the real world, (ii) the limited experience and knowledge of annotators accounts for the uninformative corpus and inappropriate recommendations. In this paper, we propose a novel automatic dataset synthesis approach that can generate large-scale and high-quality recommendation dialogues through a data2text generation process, where unstructured recommendation conversations are generated from structured graphs based on user-item information from the real world. In doing so, we comprehensively exploit: (i) rich personalized user profiles from traditional recommendation datasets, (ii) rich external knowledge from knowledge graphs, and (iii) the conversation ability contained in human-to-human conversational recommendation datasets. Extensive experiments validate the benefit brought by the automatically synthesized data under low-resource scenarios, and demonstrate the promising potential to facilitate developing a more effective conversational recommendation system.
%R 10.18653/v1/2023.findings-acl.670
%U https://aclanthology.org/2023.findings-acl.670
%U https://doi.org/10.18653/v1/2023.findings-acl.670
%P 10538-10549
Markdown (Informal)
[AUGUST: an Automatic Generation Understudy for Synthesizing Conversational Recommendation Datasets](https://aclanthology.org/2023.findings-acl.670) (Lu et al., Findings 2023)
ACL