@inproceedings{nisioi-etal-2018-content,
title = "Content Extraction and Lexical Analysis from Customer-Agent Interactions",
author = "Nisioi, Sergiu and
Bucur, Anca and
Dinu, Liviu P.",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the 2018 {EMNLP} Workshop W-{NUT}: The 4th Workshop on Noisy User-generated Text",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6118/",
doi = "10.18653/v1/W18-6118",
pages = "132--136",
abstract = "In this paper, we provide a lexical comparative analysis of the vocabulary used by customers and agents in an Enterprise Resource Planning (ERP) environment and a potential solution to clean the data and extract relevant content for NLP. As a result, we demonstrate that the actual vocabulary for the language that prevails in the ERP conversations is highly divergent from the standardized dictionary and further different from general language usage as extracted from the Common Crawl corpus. Moreover, in specific business communication circumstances, where it is expected to observe a high usage of standardized language, code switching and non-standard expression are predominant, emphasizing once more the discrepancy between the day-to-day use of language and the standardized one."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nisioi-etal-2018-content">
<titleInfo>
<title>Content Extraction and Lexical Analysis from Customer-Agent Interactions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sergiu</namePart>
<namePart type="family">Nisioi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anca</namePart>
<namePart type="family">Bucur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liviu</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Dinu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we provide a lexical comparative analysis of the vocabulary used by customers and agents in an Enterprise Resource Planning (ERP) environment and a potential solution to clean the data and extract relevant content for NLP. As a result, we demonstrate that the actual vocabulary for the language that prevails in the ERP conversations is highly divergent from the standardized dictionary and further different from general language usage as extracted from the Common Crawl corpus. Moreover, in specific business communication circumstances, where it is expected to observe a high usage of standardized language, code switching and non-standard expression are predominant, emphasizing once more the discrepancy between the day-to-day use of language and the standardized one.</abstract>
<identifier type="citekey">nisioi-etal-2018-content</identifier>
<identifier type="doi">10.18653/v1/W18-6118</identifier>
<location>
<url>https://aclanthology.org/W18-6118/</url>
</location>
<part>
<date>2018-11</date>
<extent unit="page">
<start>132</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Content Extraction and Lexical Analysis from Customer-Agent Interactions
%A Nisioi, Sergiu
%A Bucur, Anca
%A Dinu, Liviu P.
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text
%D 2018
%8 November
%I Association for Computational Linguistics
%C Brussels, Belgium
%F nisioi-etal-2018-content
%X In this paper, we provide a lexical comparative analysis of the vocabulary used by customers and agents in an Enterprise Resource Planning (ERP) environment and a potential solution to clean the data and extract relevant content for NLP. As a result, we demonstrate that the actual vocabulary for the language that prevails in the ERP conversations is highly divergent from the standardized dictionary and further different from general language usage as extracted from the Common Crawl corpus. Moreover, in specific business communication circumstances, where it is expected to observe a high usage of standardized language, code switching and non-standard expression are predominant, emphasizing once more the discrepancy between the day-to-day use of language and the standardized one.
%R 10.18653/v1/W18-6118
%U https://aclanthology.org/W18-6118/
%U https://doi.org/10.18653/v1/W18-6118
%P 132-136
Markdown (Informal)
[Content Extraction and Lexical Analysis from Customer-Agent Interactions](https://aclanthology.org/W18-6118/) (Nisioi et al., WNUT 2018)
ACL