@inproceedings{collins-etal-2024-collecting,
title = "Collecting High-quality Multi-modal Conversational Search Data for {E}-Commerce",
author = "Collins, Marcus and
Rokhlenko, Oleg and
Agichtein, Eugene and
Malmasi, Shervin",
editor = "Yu, Wenhao and
Shi, Weijia and
Yasunaga, Michihiro and
Jiang, Meng and
Zhu, Chenguang and
Hajishirzi, Hannaneh and
Zettlemoyer, Luke and
Zhang, Zhihan",
booktitle = "Proceedings of the 3rd Workshop on Knowledge Augmented Methods for NLP",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.knowledgenlp-1.3",
doi = "10.18653/v1/2024.knowledgenlp-1.3",
pages = "30--43",
abstract = "Continued improvement of conversational assistants in knowledge-rich domains like E-Commerce requires large volumes of realistic high-quality conversation data to power increasingly sophisticated large language model chatbots, dialogue managers, response rankers, and recommenders. The problem is exacerbated for multi-modal interactions in realistic conversational product search and recommendation. Here, an artificial sales agent must interact intelligently with a customer using both textual and visual information and incorporate results from external search systems, such as a product catalog. Yet, it remains an open question how to best crowd-source large-scale, naturalistic multi-modal dialogue and action data, required to train such an artificial agent. We describe our crowd-sourced task where one worker (the Buyer) plays the role of the customer, and another (the Seller) plays the role of the sales agent. We identify subtle interactions between one worker{'}s environment and their partner{'}s behavior mediated by workers{'} word choice. We find that limiting information presented to the Buyer, both in their backstory and by the Seller, improves conversation quality. We also show how conversations are improved through minimal automated Seller {``}coaching{''}. While typed and spoken messages are slightly different, the differences are not as large as frequently assumed. We plan to release our platform code and the resulting dialogues to advance research on conversational search agents.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="collins-etal-2024-collecting">
<titleInfo>
<title>Collecting High-quality Multi-modal Conversational Search Data for E-Commerce</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcus</namePart>
<namePart type="family">Collins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Rokhlenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugene</namePart>
<namePart type="family">Agichtein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Knowledge Augmented Methods for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weijia</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michihiro</namePart>
<namePart type="family">Yasunaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenguang</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhihan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Continued improvement of conversational assistants in knowledge-rich domains like E-Commerce requires large volumes of realistic high-quality conversation data to power increasingly sophisticated large language model chatbots, dialogue managers, response rankers, and recommenders. The problem is exacerbated for multi-modal interactions in realistic conversational product search and recommendation. Here, an artificial sales agent must interact intelligently with a customer using both textual and visual information and incorporate results from external search systems, such as a product catalog. Yet, it remains an open question how to best crowd-source large-scale, naturalistic multi-modal dialogue and action data, required to train such an artificial agent. We describe our crowd-sourced task where one worker (the Buyer) plays the role of the customer, and another (the Seller) plays the role of the sales agent. We identify subtle interactions between one worker’s environment and their partner’s behavior mediated by workers’ word choice. We find that limiting information presented to the Buyer, both in their backstory and by the Seller, improves conversation quality. We also show how conversations are improved through minimal automated Seller “coaching”. While typed and spoken messages are slightly different, the differences are not as large as frequently assumed. We plan to release our platform code and the resulting dialogues to advance research on conversational search agents.</abstract>
<identifier type="citekey">collins-etal-2024-collecting</identifier>
<identifier type="doi">10.18653/v1/2024.knowledgenlp-1.3</identifier>
<location>
<url>https://aclanthology.org/2024.knowledgenlp-1.3</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>30</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Collecting High-quality Multi-modal Conversational Search Data for E-Commerce
%A Collins, Marcus
%A Rokhlenko, Oleg
%A Agichtein, Eugene
%A Malmasi, Shervin
%Y Yu, Wenhao
%Y Shi, Weijia
%Y Yasunaga, Michihiro
%Y Jiang, Meng
%Y Zhu, Chenguang
%Y Hajishirzi, Hannaneh
%Y Zettlemoyer, Luke
%Y Zhang, Zhihan
%S Proceedings of the 3rd Workshop on Knowledge Augmented Methods for NLP
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F collins-etal-2024-collecting
%X Continued improvement of conversational assistants in knowledge-rich domains like E-Commerce requires large volumes of realistic high-quality conversation data to power increasingly sophisticated large language model chatbots, dialogue managers, response rankers, and recommenders. The problem is exacerbated for multi-modal interactions in realistic conversational product search and recommendation. Here, an artificial sales agent must interact intelligently with a customer using both textual and visual information and incorporate results from external search systems, such as a product catalog. Yet, it remains an open question how to best crowd-source large-scale, naturalistic multi-modal dialogue and action data, required to train such an artificial agent. We describe our crowd-sourced task where one worker (the Buyer) plays the role of the customer, and another (the Seller) plays the role of the sales agent. We identify subtle interactions between one worker’s environment and their partner’s behavior mediated by workers’ word choice. We find that limiting information presented to the Buyer, both in their backstory and by the Seller, improves conversation quality. We also show how conversations are improved through minimal automated Seller “coaching”. While typed and spoken messages are slightly different, the differences are not as large as frequently assumed. We plan to release our platform code and the resulting dialogues to advance research on conversational search agents.
%R 10.18653/v1/2024.knowledgenlp-1.3
%U https://aclanthology.org/2024.knowledgenlp-1.3
%U https://doi.org/10.18653/v1/2024.knowledgenlp-1.3
%P 30-43
Markdown (Informal)
[Collecting High-quality Multi-modal Conversational Search Data for E-Commerce](https://aclanthology.org/2024.knowledgenlp-1.3) (Collins et al., KnowledgeNLP-WS 2024)
ACL