@inproceedings{hanneman-etal-2024-impacts,
title = "Impacts of Misspelled Queries on Translation and Product Search",
author = "Hanneman, Greg and
Monaikul, Natawut and
Nakatani, Taichi",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-long.750/",
doi = "10.18653/v1/2024.acl-long.750",
pages = "13907--13920",
abstract = "Machine translation is used in e-commerce to translate second-language queries into the primary language of the store, to be matched by the search system against the product catalog. However, many queries contain spelling mistakes. We first present an analysis of the spelling-robustness of a population of MT systems, quantifying how spelling variations affect MT output, the list of returned products, and ultimately user behavior. We then present two sets of practical experiments illustrating how spelling-robustness may be specifically improved. For MT, reducing the number of BPE operations significantly improves spelling-robustness in six language pairs. In end-to-end e-commerce, the inclusion of a dedicated spelling correction model, and the augmentation of that model`s training data with language-relevant phenomena, each improve robustness and consistency of search results."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hanneman-etal-2024-impacts">
<titleInfo>
<title>Impacts of Misspelled Queries on Translation and Product Search</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Hanneman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natawut</namePart>
<namePart type="family">Monaikul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taichi</namePart>
<namePart type="family">Nakatani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation is used in e-commerce to translate second-language queries into the primary language of the store, to be matched by the search system against the product catalog. However, many queries contain spelling mistakes. We first present an analysis of the spelling-robustness of a population of MT systems, quantifying how spelling variations affect MT output, the list of returned products, and ultimately user behavior. We then present two sets of practical experiments illustrating how spelling-robustness may be specifically improved. For MT, reducing the number of BPE operations significantly improves spelling-robustness in six language pairs. In end-to-end e-commerce, the inclusion of a dedicated spelling correction model, and the augmentation of that model‘s training data with language-relevant phenomena, each improve robustness and consistency of search results.</abstract>
<identifier type="citekey">hanneman-etal-2024-impacts</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.750</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-long.750/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>13907</start>
<end>13920</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Impacts of Misspelled Queries on Translation and Product Search
%A Hanneman, Greg
%A Monaikul, Natawut
%A Nakatani, Taichi
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F hanneman-etal-2024-impacts
%X Machine translation is used in e-commerce to translate second-language queries into the primary language of the store, to be matched by the search system against the product catalog. However, many queries contain spelling mistakes. We first present an analysis of the spelling-robustness of a population of MT systems, quantifying how spelling variations affect MT output, the list of returned products, and ultimately user behavior. We then present two sets of practical experiments illustrating how spelling-robustness may be specifically improved. For MT, reducing the number of BPE operations significantly improves spelling-robustness in six language pairs. In end-to-end e-commerce, the inclusion of a dedicated spelling correction model, and the augmentation of that model‘s training data with language-relevant phenomena, each improve robustness and consistency of search results.
%R 10.18653/v1/2024.acl-long.750
%U https://aclanthology.org/2024.luhme-long.750/
%U https://doi.org/10.18653/v1/2024.acl-long.750
%P 13907-13920
Markdown (Informal)
[Impacts of Misspelled Queries on Translation and Product Search](https://aclanthology.org/2024.luhme-long.750/) (Hanneman et al., ACL 2024)
ACL
- Greg Hanneman, Natawut Monaikul, and Taichi Nakatani. 2024. Impacts of Misspelled Queries on Translation and Product Search. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 13907–13920, Bangkok, Thailand. Association for Computational Linguistics.