@inproceedings{ajayi-etal-2024-using,
title = "Using Information Retrieval Techniques to Automatically Repurpose Existing Dialogue Datasets for Safe Chatbot Development",
author = "Ajayi, Tunde Oluwaseyi and
Negi, Gaurav and
Arcan, Mihael and
Buitelaar, Paul",
editor = "Dinkar, Tanvi and
Attanasio, Giuseppe and
Curry, Amanda Cercas and
Konstas, Ioannis and
Hovy, Dirk and
Rieser, Verena",
booktitle = "Proceedings of Safety4ConvAI: The Third Workshop on Safety for Conversational AI @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.safety4convai-1.3",
pages = "16--27",
abstract = "There has been notable progress in the development of open-domain dialogue systems (chatbots) especially with the rapid advancement of the capabilities of Large Language Models. Chatbots excel at holding conversations in a manner that keeps a user interested and engaged. However, their responses can be unsafe, as they can respond in an offensive manner or offer harmful professional advice. As a way to mitigate this issue, recent work crowdsource datasets with exemplary responses or annotate dialogue safety datasets, which are relatively scarce compared to casual dialogues. Despite the quality of data obtained from crowdsourcing, it can be expensive and time consuming. This work proposes an effective pipeline, using information retrieval, to automatically repurpose existing dialogue datasets for safe chatbot development, as a way to address the aforementioned challenges. We select an existing dialogue dataset, revise its unsafe responses, as a way to obtain a dataset with safer responses to unsafe user inputs. We then fine-tune dialogue models on the original and revised datasets and generate responses to evaluate the safeness of the models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ajayi-etal-2024-using">
<titleInfo>
<title>Using Information Retrieval Techniques to Automatically Repurpose Existing Dialogue Datasets for Safe Chatbot Development</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tunde</namePart>
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Ajayi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaurav</namePart>
<namePart type="family">Negi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihael</namePart>
<namePart type="family">Arcan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Safety4ConvAI: The Third Workshop on Safety for Conversational AI @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanvi</namePart>
<namePart type="family">Dinkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Attanasio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="given">Cercas</namePart>
<namePart type="family">Curry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Konstas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verena</namePart>
<namePart type="family">Rieser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>There has been notable progress in the development of open-domain dialogue systems (chatbots) especially with the rapid advancement of the capabilities of Large Language Models. Chatbots excel at holding conversations in a manner that keeps a user interested and engaged. However, their responses can be unsafe, as they can respond in an offensive manner or offer harmful professional advice. As a way to mitigate this issue, recent work crowdsource datasets with exemplary responses or annotate dialogue safety datasets, which are relatively scarce compared to casual dialogues. Despite the quality of data obtained from crowdsourcing, it can be expensive and time consuming. This work proposes an effective pipeline, using information retrieval, to automatically repurpose existing dialogue datasets for safe chatbot development, as a way to address the aforementioned challenges. We select an existing dialogue dataset, revise its unsafe responses, as a way to obtain a dataset with safer responses to unsafe user inputs. We then fine-tune dialogue models on the original and revised datasets and generate responses to evaluate the safeness of the models.</abstract>
<identifier type="citekey">ajayi-etal-2024-using</identifier>
<location>
<url>https://aclanthology.org/2024.safety4convai-1.3</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>16</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Information Retrieval Techniques to Automatically Repurpose Existing Dialogue Datasets for Safe Chatbot Development
%A Ajayi, Tunde Oluwaseyi
%A Negi, Gaurav
%A Arcan, Mihael
%A Buitelaar, Paul
%Y Dinkar, Tanvi
%Y Attanasio, Giuseppe
%Y Curry, Amanda Cercas
%Y Konstas, Ioannis
%Y Hovy, Dirk
%Y Rieser, Verena
%S Proceedings of Safety4ConvAI: The Third Workshop on Safety for Conversational AI @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ajayi-etal-2024-using
%X There has been notable progress in the development of open-domain dialogue systems (chatbots) especially with the rapid advancement of the capabilities of Large Language Models. Chatbots excel at holding conversations in a manner that keeps a user interested and engaged. However, their responses can be unsafe, as they can respond in an offensive manner or offer harmful professional advice. As a way to mitigate this issue, recent work crowdsource datasets with exemplary responses or annotate dialogue safety datasets, which are relatively scarce compared to casual dialogues. Despite the quality of data obtained from crowdsourcing, it can be expensive and time consuming. This work proposes an effective pipeline, using information retrieval, to automatically repurpose existing dialogue datasets for safe chatbot development, as a way to address the aforementioned challenges. We select an existing dialogue dataset, revise its unsafe responses, as a way to obtain a dataset with safer responses to unsafe user inputs. We then fine-tune dialogue models on the original and revised datasets and generate responses to evaluate the safeness of the models.
%U https://aclanthology.org/2024.safety4convai-1.3
%P 16-27
Markdown (Informal)
[Using Information Retrieval Techniques to Automatically Repurpose Existing Dialogue Datasets for Safe Chatbot Development](https://aclanthology.org/2024.safety4convai-1.3) (Ajayi et al., Safety4ConvAI-WS 2024)
ACL