@inproceedings{xu-etal-2024-hr,
title = "{HR}-{M}ulti{WOZ}: A Task Oriented Dialogue ({TOD}) Dataset for {HR} {LLM} Agent",
author = "Xu, Weijie and
Huang, Zicheng and
Hu, Wenxiang and
Fang, Xi and
Cherukuri, Rajesh and
Nayyar, Naumaan and
Malandri, Lorenzo and
Sengamedu, Srinivasan",
editor = "Hruschka, Estevam and
Lake, Thom and
Otani, Naoki and
Mitchell, Tom",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Human Resources (NLP4HR 2024)",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4hr-1.5",
pages = "59--72",
abstract = "Recent advancements in Large Language Models (LLMs) have been reshaping Natural Language Processing (NLP) task in several domains. Their use in the field of Human Resources (HR) has still room for expansions and could be beneficial for several time consuming tasks. Examples such as time-off submissions, medical claims filing, and access requests are noteworthy, but they are by no means the sole instances. However the aforementioned developments must grapple with the pivotal challenge of constructing a high-quality training dataset. On one hand, most conversation datasets are solving problems for customers not employees. On the other hand, gathering conversations with HR could raise privacy concerns. To solve it, we introduce HR-Multiwoz, a fully-labeled dataset of 550 conversations spanning 10 HR domains. Our work has the following contributions:(1) It is the first labeled open-sourced conversation dataset in the HR domain for NLP research. (2) It provides a detailed recipe for the data generation procedure along with data analysis and human evaluations. The data generation pipeline is transferrable and can be easily adapted for labeled conversation data generation in other domains. (3) The proposed data-collection pipeline is mostly based on LLMs with minimal human involvement for annotation, which is time and cost-efficient.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2024-hr">
<titleInfo>
<title>HR-MultiWOZ: A Task Oriented Dialogue (TOD) Dataset for HR LLM Agent</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weijie</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zicheng</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenxiang</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xi</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajesh</namePart>
<namePart type="family">Cherukuri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naumaan</namePart>
<namePart type="family">Nayyar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lorenzo</namePart>
<namePart type="family">Malandri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivasan</namePart>
<namePart type="family">Sengamedu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Human Resources (NLP4HR 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Estevam</namePart>
<namePart type="family">Hruschka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thom</namePart>
<namePart type="family">Lake</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoki</namePart>
<namePart type="family">Otani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Mitchell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent advancements in Large Language Models (LLMs) have been reshaping Natural Language Processing (NLP) task in several domains. Their use in the field of Human Resources (HR) has still room for expansions and could be beneficial for several time consuming tasks. Examples such as time-off submissions, medical claims filing, and access requests are noteworthy, but they are by no means the sole instances. However the aforementioned developments must grapple with the pivotal challenge of constructing a high-quality training dataset. On one hand, most conversation datasets are solving problems for customers not employees. On the other hand, gathering conversations with HR could raise privacy concerns. To solve it, we introduce HR-Multiwoz, a fully-labeled dataset of 550 conversations spanning 10 HR domains. Our work has the following contributions:(1) It is the first labeled open-sourced conversation dataset in the HR domain for NLP research. (2) It provides a detailed recipe for the data generation procedure along with data analysis and human evaluations. The data generation pipeline is transferrable and can be easily adapted for labeled conversation data generation in other domains. (3) The proposed data-collection pipeline is mostly based on LLMs with minimal human involvement for annotation, which is time and cost-efficient.</abstract>
<identifier type="citekey">xu-etal-2024-hr</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4hr-1.5</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>59</start>
<end>72</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HR-MultiWOZ: A Task Oriented Dialogue (TOD) Dataset for HR LLM Agent
%A Xu, Weijie
%A Huang, Zicheng
%A Hu, Wenxiang
%A Fang, Xi
%A Cherukuri, Rajesh
%A Nayyar, Naumaan
%A Malandri, Lorenzo
%A Sengamedu, Srinivasan
%Y Hruschka, Estevam
%Y Lake, Thom
%Y Otani, Naoki
%Y Mitchell, Tom
%S Proceedings of the First Workshop on Natural Language Processing for Human Resources (NLP4HR 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F xu-etal-2024-hr
%X Recent advancements in Large Language Models (LLMs) have been reshaping Natural Language Processing (NLP) task in several domains. Their use in the field of Human Resources (HR) has still room for expansions and could be beneficial for several time consuming tasks. Examples such as time-off submissions, medical claims filing, and access requests are noteworthy, but they are by no means the sole instances. However the aforementioned developments must grapple with the pivotal challenge of constructing a high-quality training dataset. On one hand, most conversation datasets are solving problems for customers not employees. On the other hand, gathering conversations with HR could raise privacy concerns. To solve it, we introduce HR-Multiwoz, a fully-labeled dataset of 550 conversations spanning 10 HR domains. Our work has the following contributions:(1) It is the first labeled open-sourced conversation dataset in the HR domain for NLP research. (2) It provides a detailed recipe for the data generation procedure along with data analysis and human evaluations. The data generation pipeline is transferrable and can be easily adapted for labeled conversation data generation in other domains. (3) The proposed data-collection pipeline is mostly based on LLMs with minimal human involvement for annotation, which is time and cost-efficient.
%U https://aclanthology.org/2024.nlp4hr-1.5
%P 59-72
Markdown (Informal)
[HR-MultiWOZ: A Task Oriented Dialogue (TOD) Dataset for HR LLM Agent](https://aclanthology.org/2024.nlp4hr-1.5) (Xu et al., NLP4HR-WS 2024)
ACL
- Weijie Xu, Zicheng Huang, Wenxiang Hu, Xi Fang, Rajesh Cherukuri, Naumaan Nayyar, Lorenzo Malandri, and Srinivasan Sengamedu. 2024. HR-MultiWOZ: A Task Oriented Dialogue (TOD) Dataset for HR LLM Agent. In Proceedings of the First Workshop on Natural Language Processing for Human Resources (NLP4HR 2024), pages 59–72, St. Julian’s, Malta. Association for Computational Linguistics.