@inproceedings{ganu-p-2018-fast,
title = "Fast Query Expansion on an Accounting Corpus using Sub-Word Embeddings",
author = "Ganu, Hrishikesh and
P., Viswa Datha",
editor = {Faruqui, Manaal and
Sch{\"u}tze, Hinrich and
Trancoso, Isabel and
Tsvetkov, Yulia and
Yaghoobzadeh, Yadollah},
booktitle = "Proceedings of the Second Workshop on Subword/Character {LE}vel Models",
month = jun,
year = "2018",
address = "New Orleans",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-1208",
doi = "10.18653/v1/W18-1208",
pages = "61--65",
abstract = "We present early results from a system under development which uses sub-word embeddings for query expansion in presence of mis-spelled words and other aberrations. We work for a company which creates accounting software and the end goal is to improve customer experience when they search for help on our {``}Customer Care{''} portal. Our customers use colloquial language, non-standard acronyms and sometimes mis-spell words when they use our Search portal or interact over other channels. However, our Knowledge Base has curated content which leverages technical terms and is in language which is quite formal. This results in the answer not being retrieved even though the answer might actually be present in the documentation (as assessed by a human). We address this problem by creating equivalence classes of words with similar meanings (with the additional property that the mappings to these equivalence classes are robust to mis-spellings) using sub-word embeddings and then use them to fine tune an Elasticsearch index to improve recall. We demonstrate through an end-end system that using sub-word embeddings leads to a significant lift in correct answers retrieved for an accounting corpus available in the public domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ganu-p-2018-fast">
<titleInfo>
<title>Fast Query Expansion on an Accounting Corpus using Sub-Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hrishikesh</namePart>
<namePart type="family">Ganu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viswa</namePart>
<namePart type="given">Datha</namePart>
<namePart type="family">P.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Subword/Character LEvel Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manaal</namePart>
<namePart type="family">Faruqui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schütze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulia</namePart>
<namePart type="family">Tsvetkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yadollah</namePart>
<namePart type="family">Yaghoobzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present early results from a system under development which uses sub-word embeddings for query expansion in presence of mis-spelled words and other aberrations. We work for a company which creates accounting software and the end goal is to improve customer experience when they search for help on our “Customer Care” portal. Our customers use colloquial language, non-standard acronyms and sometimes mis-spell words when they use our Search portal or interact over other channels. However, our Knowledge Base has curated content which leverages technical terms and is in language which is quite formal. This results in the answer not being retrieved even though the answer might actually be present in the documentation (as assessed by a human). We address this problem by creating equivalence classes of words with similar meanings (with the additional property that the mappings to these equivalence classes are robust to mis-spellings) using sub-word embeddings and then use them to fine tune an Elasticsearch index to improve recall. We demonstrate through an end-end system that using sub-word embeddings leads to a significant lift in correct answers retrieved for an accounting corpus available in the public domain.</abstract>
<identifier type="citekey">ganu-p-2018-fast</identifier>
<identifier type="doi">10.18653/v1/W18-1208</identifier>
<location>
<url>https://aclanthology.org/W18-1208</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>61</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fast Query Expansion on an Accounting Corpus using Sub-Word Embeddings
%A Ganu, Hrishikesh
%A P., Viswa Datha
%Y Faruqui, Manaal
%Y Schütze, Hinrich
%Y Trancoso, Isabel
%Y Tsvetkov, Yulia
%Y Yaghoobzadeh, Yadollah
%S Proceedings of the Second Workshop on Subword/Character LEvel Models
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans
%F ganu-p-2018-fast
%X We present early results from a system under development which uses sub-word embeddings for query expansion in presence of mis-spelled words and other aberrations. We work for a company which creates accounting software and the end goal is to improve customer experience when they search for help on our “Customer Care” portal. Our customers use colloquial language, non-standard acronyms and sometimes mis-spell words when they use our Search portal or interact over other channels. However, our Knowledge Base has curated content which leverages technical terms and is in language which is quite formal. This results in the answer not being retrieved even though the answer might actually be present in the documentation (as assessed by a human). We address this problem by creating equivalence classes of words with similar meanings (with the additional property that the mappings to these equivalence classes are robust to mis-spellings) using sub-word embeddings and then use them to fine tune an Elasticsearch index to improve recall. We demonstrate through an end-end system that using sub-word embeddings leads to a significant lift in correct answers retrieved for an accounting corpus available in the public domain.
%R 10.18653/v1/W18-1208
%U https://aclanthology.org/W18-1208
%U https://doi.org/10.18653/v1/W18-1208
%P 61-65
Markdown (Informal)
[Fast Query Expansion on an Accounting Corpus using Sub-Word Embeddings](https://aclanthology.org/W18-1208) (Ganu & P., SCLeM 2018)
ACL