@inproceedings{vielsted-etal-2022-increasing,
title = "Increasing Robustness for Cross-domain Dialogue Act Classification on Social Media Data",
author = "Vielsted, Marcus and
Wallenius, Nikolaj and
van der Goot, Rob",
booktitle = "Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wnut-1.20",
pages = "180--193",
abstract = "Automatically detecting the intent of an utterance is important for various downstream natural language processing tasks. This task is also called Dialogue Act Classification (DAC) and was primarily researched on spoken one-to-one conversations. The rise of social media has made this an interesting data source to explore within DAC, although it comes with some difficulties: non-standard form, variety of language types (across and within platforms), and quickly evolving norms. We therefore investigate the robustness of DAC on social media data in this paper. More concretely, we provide a benchmark that includes cross-domain data splits, as well as a variety of improvements on our transformer-based baseline. Our experiments show that lexical normalization is not beneficial in this setup, balancing the labels through resampling is beneficial in some cases, and incorporating context is crucial for this task and leads to the highest performance improvements 7 F1 percentage points in-domain and 20 cross-domain).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vielsted-etal-2022-increasing">
<titleInfo>
<title>Increasing Robustness for Cross-domain Dialogue Act Classification on Social Media Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcus</namePart>
<namePart type="family">Vielsted</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaj</namePart>
<namePart type="family">Wallenius</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">van der Goot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatically detecting the intent of an utterance is important for various downstream natural language processing tasks. This task is also called Dialogue Act Classification (DAC) and was primarily researched on spoken one-to-one conversations. The rise of social media has made this an interesting data source to explore within DAC, although it comes with some difficulties: non-standard form, variety of language types (across and within platforms), and quickly evolving norms. We therefore investigate the robustness of DAC on social media data in this paper. More concretely, we provide a benchmark that includes cross-domain data splits, as well as a variety of improvements on our transformer-based baseline. Our experiments show that lexical normalization is not beneficial in this setup, balancing the labels through resampling is beneficial in some cases, and incorporating context is crucial for this task and leads to the highest performance improvements 7 F1 percentage points in-domain and 20 cross-domain).</abstract>
<identifier type="citekey">vielsted-etal-2022-increasing</identifier>
<location>
<url>https://aclanthology.org/2022.wnut-1.20</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>180</start>
<end>193</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Increasing Robustness for Cross-domain Dialogue Act Classification on Social Media Data
%A Vielsted, Marcus
%A Wallenius, Nikolaj
%A van der Goot, Rob
%S Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F vielsted-etal-2022-increasing
%X Automatically detecting the intent of an utterance is important for various downstream natural language processing tasks. This task is also called Dialogue Act Classification (DAC) and was primarily researched on spoken one-to-one conversations. The rise of social media has made this an interesting data source to explore within DAC, although it comes with some difficulties: non-standard form, variety of language types (across and within platforms), and quickly evolving norms. We therefore investigate the robustness of DAC on social media data in this paper. More concretely, we provide a benchmark that includes cross-domain data splits, as well as a variety of improvements on our transformer-based baseline. Our experiments show that lexical normalization is not beneficial in this setup, balancing the labels through resampling is beneficial in some cases, and incorporating context is crucial for this task and leads to the highest performance improvements 7 F1 percentage points in-domain and 20 cross-domain).
%U https://aclanthology.org/2022.wnut-1.20
%P 180-193
Markdown (Informal)
[Increasing Robustness for Cross-domain Dialogue Act Classification on Social Media Data](https://aclanthology.org/2022.wnut-1.20) (Vielsted et al., WNUT 2022)
ACL