@inproceedings{rivera-etal-2019-annotation,
title = "Annotation Process for the Dialog Act Classification of a Taglish {E}-commerce {Q}{\&}{A} Corpus",
author = "Rivera, Jared and
Pensica, Jan Caleb Oliver and
Valenzuela, Jolene and
Secuya, Alfonso and
Cheng, Charibeth",
editor = "Hahn, Udo and
Hoste, V{\'e}ronique and
Zhang, Zhu",
booktitle = "Proceedings of the Second Workshop on Economics and Natural Language Processing",
month = nov,
year = "2019",
address = "Hong Kong",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5108",
doi = "10.18653/v1/D19-5108",
pages = "61--68",
abstract = "With conversational agents or chatbots making up in quantity of replies rather than quality, the need to identify user intent has become a main concern to improve these agents. Dialog act (DA) classification tackles this concern, and while existing studies have already addressed DA classification in general contexts, no training corpora in the context of e-commerce is available to the public. This research addressed the said insufficiency by building a text-based corpus of 7,265 posts from the question and answer section of products on Lazada Philippines. The SWBD-DAMSL tagset for DA classification was modified to 28 tags fitting the categories applicable to e-commerce conversations. The posts were annotated manually by three (3) human annotators and preprocessing techniques decreased the vocabulary size from 6,340 to 1,134. After analysis, the corpus was composed dominantly of single-label posts, with 34{\%} of the corpus having multiple intent tags. The annotated corpus allowed insights toward the structure of posts created with single to multiple intents.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rivera-etal-2019-annotation">
<titleInfo>
<title>Annotation Process for the Dialog Act Classification of a Taglish E-commerce Q&A Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jared</namePart>
<namePart type="family">Rivera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">Caleb</namePart>
<namePart type="given">Oliver</namePart>
<namePart type="family">Pensica</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jolene</namePart>
<namePart type="family">Valenzuela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alfonso</namePart>
<namePart type="family">Secuya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charibeth</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Economics and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Véronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With conversational agents or chatbots making up in quantity of replies rather than quality, the need to identify user intent has become a main concern to improve these agents. Dialog act (DA) classification tackles this concern, and while existing studies have already addressed DA classification in general contexts, no training corpora in the context of e-commerce is available to the public. This research addressed the said insufficiency by building a text-based corpus of 7,265 posts from the question and answer section of products on Lazada Philippines. The SWBD-DAMSL tagset for DA classification was modified to 28 tags fitting the categories applicable to e-commerce conversations. The posts were annotated manually by three (3) human annotators and preprocessing techniques decreased the vocabulary size from 6,340 to 1,134. After analysis, the corpus was composed dominantly of single-label posts, with 34% of the corpus having multiple intent tags. The annotated corpus allowed insights toward the structure of posts created with single to multiple intents.</abstract>
<identifier type="citekey">rivera-etal-2019-annotation</identifier>
<identifier type="doi">10.18653/v1/D19-5108</identifier>
<location>
<url>https://aclanthology.org/D19-5108</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>61</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Annotation Process for the Dialog Act Classification of a Taglish E-commerce Q&A Corpus
%A Rivera, Jared
%A Pensica, Jan Caleb Oliver
%A Valenzuela, Jolene
%A Secuya, Alfonso
%A Cheng, Charibeth
%Y Hahn, Udo
%Y Hoste, Véronique
%Y Zhang, Zhu
%S Proceedings of the Second Workshop on Economics and Natural Language Processing
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong
%F rivera-etal-2019-annotation
%X With conversational agents or chatbots making up in quantity of replies rather than quality, the need to identify user intent has become a main concern to improve these agents. Dialog act (DA) classification tackles this concern, and while existing studies have already addressed DA classification in general contexts, no training corpora in the context of e-commerce is available to the public. This research addressed the said insufficiency by building a text-based corpus of 7,265 posts from the question and answer section of products on Lazada Philippines. The SWBD-DAMSL tagset for DA classification was modified to 28 tags fitting the categories applicable to e-commerce conversations. The posts were annotated manually by three (3) human annotators and preprocessing techniques decreased the vocabulary size from 6,340 to 1,134. After analysis, the corpus was composed dominantly of single-label posts, with 34% of the corpus having multiple intent tags. The annotated corpus allowed insights toward the structure of posts created with single to multiple intents.
%R 10.18653/v1/D19-5108
%U https://aclanthology.org/D19-5108
%U https://doi.org/10.18653/v1/D19-5108
%P 61-68
Markdown (Informal)
[Annotation Process for the Dialog Act Classification of a Taglish E-commerce Q&A Corpus](https://aclanthology.org/D19-5108) (Rivera et al., 2019)
ACL