@inproceedings{tasawong-etal-2023-typo,
title = "Typo-Robust Representation Learning for Dense Retrieval",
author = "Tasawong, Panuthep and
Ponwitayarat, Wuttikorn and
Limkonchotiwat, Peerat and
Udomcharoenchaikit, Can and
Chuangsuwanich, Ekapol and
Nutanong, Sarana",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-short.95",
doi = "10.18653/v1/2023.acl-short.95",
pages = "1106--1115",
abstract = "Dense retrieval is a basic building block of information retrieval applications. One of the main challenges of dense retrieval in real-world settings is the handling of queries containing misspelled words. A popular approach for handling misspelled queries is minimizing the representations discrepancy between misspelled queries and their pristine ones. Unlike the existing approaches, which only focus on the alignment between misspelled and pristine queries, our method also improves the contrast between each misspelled query and its surrounding queries. To assess the effectiveness of our proposed method, we compare it against the existing competitors using two benchmark datasets and two base encoders. Our method outperforms the competitors in all cases with misspelled queries. Our code and models are available at \url{https://github.com/panuthept/DST-DenseRetrieval}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tasawong-etal-2023-typo">
<titleInfo>
<title>Typo-Robust Representation Learning for Dense Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Panuthep</namePart>
<namePart type="family">Tasawong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wuttikorn</namePart>
<namePart type="family">Ponwitayarat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Can</namePart>
<namePart type="family">Udomcharoenchaikit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekapol</namePart>
<namePart type="family">Chuangsuwanich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarana</namePart>
<namePart type="family">Nutanong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dense retrieval is a basic building block of information retrieval applications. One of the main challenges of dense retrieval in real-world settings is the handling of queries containing misspelled words. A popular approach for handling misspelled queries is minimizing the representations discrepancy between misspelled queries and their pristine ones. Unlike the existing approaches, which only focus on the alignment between misspelled and pristine queries, our method also improves the contrast between each misspelled query and its surrounding queries. To assess the effectiveness of our proposed method, we compare it against the existing competitors using two benchmark datasets and two base encoders. Our method outperforms the competitors in all cases with misspelled queries. Our code and models are available at https://github.com/panuthept/DST-DenseRetrieval.</abstract>
<identifier type="citekey">tasawong-etal-2023-typo</identifier>
<identifier type="doi">10.18653/v1/2023.acl-short.95</identifier>
<location>
<url>https://aclanthology.org/2023.acl-short.95</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1106</start>
<end>1115</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Typo-Robust Representation Learning for Dense Retrieval
%A Tasawong, Panuthep
%A Ponwitayarat, Wuttikorn
%A Limkonchotiwat, Peerat
%A Udomcharoenchaikit, Can
%A Chuangsuwanich, Ekapol
%A Nutanong, Sarana
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F tasawong-etal-2023-typo
%X Dense retrieval is a basic building block of information retrieval applications. One of the main challenges of dense retrieval in real-world settings is the handling of queries containing misspelled words. A popular approach for handling misspelled queries is minimizing the representations discrepancy between misspelled queries and their pristine ones. Unlike the existing approaches, which only focus on the alignment between misspelled and pristine queries, our method also improves the contrast between each misspelled query and its surrounding queries. To assess the effectiveness of our proposed method, we compare it against the existing competitors using two benchmark datasets and two base encoders. Our method outperforms the competitors in all cases with misspelled queries. Our code and models are available at https://github.com/panuthept/DST-DenseRetrieval.
%R 10.18653/v1/2023.acl-short.95
%U https://aclanthology.org/2023.acl-short.95
%U https://doi.org/10.18653/v1/2023.acl-short.95
%P 1106-1115
Markdown (Informal)
[Typo-Robust Representation Learning for Dense Retrieval](https://aclanthology.org/2023.acl-short.95) (Tasawong et al., ACL 2023)
ACL
- Panuthep Tasawong, Wuttikorn Ponwitayarat, Peerat Limkonchotiwat, Can Udomcharoenchaikit, Ekapol Chuangsuwanich, and Sarana Nutanong. 2023. Typo-Robust Representation Learning for Dense Retrieval. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 1106–1115, Toronto, Canada. Association for Computational Linguistics.