@inproceedings{dumont-etal-2019-wrote,
title = "Who wrote this book? A challenge for e-commerce",
author = "Dumont, B{\'e}ranger and
Maggio, Simona and
Sidi Said, Ghiles and
Au, Quoc-Tien",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5516",
doi = "10.18653/v1/D19-5516",
pages = "121--125",
abstract = "Modern e-commerce catalogs contain millions of references, associated with textual and visual information that is of paramount importance for the products to be found via search or browsing. Of particular significance is the book category, where the author name(s) field poses a significant challenge. Indeed, books written by a given author might be listed with different authors{'} names due to abbreviations, spelling variants and mistakes, among others. To solve this problem at scale, we design a composite system involving open data sources for books, as well as deep learning components, such as approximate match with Siamese networks and name correction with sequence-to-sequence networks. We evaluate this approach on product data from the e-commerce website Rakuten France, and find that the top proposal of the system is the normalized author name with 72{\%} accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dumont-etal-2019-wrote">
<titleInfo>
<title>Who wrote this book? A challenge for e-commerce</title>
</titleInfo>
<name type="personal">
<namePart type="given">Béranger</namePart>
<namePart type="family">Dumont</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="family">Maggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ghiles</namePart>
<namePart type="family">Sidi Said</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quoc-Tien</namePart>
<namePart type="family">Au</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Modern e-commerce catalogs contain millions of references, associated with textual and visual information that is of paramount importance for the products to be found via search or browsing. Of particular significance is the book category, where the author name(s) field poses a significant challenge. Indeed, books written by a given author might be listed with different authors’ names due to abbreviations, spelling variants and mistakes, among others. To solve this problem at scale, we design a composite system involving open data sources for books, as well as deep learning components, such as approximate match with Siamese networks and name correction with sequence-to-sequence networks. We evaluate this approach on product data from the e-commerce website Rakuten France, and find that the top proposal of the system is the normalized author name with 72% accuracy.</abstract>
<identifier type="citekey">dumont-etal-2019-wrote</identifier>
<identifier type="doi">10.18653/v1/D19-5516</identifier>
<location>
<url>https://aclanthology.org/D19-5516</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>121</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Who wrote this book? A challenge for e-commerce
%A Dumont, Béranger
%A Maggio, Simona
%A Sidi Said, Ghiles
%A Au, Quoc-Tien
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F dumont-etal-2019-wrote
%X Modern e-commerce catalogs contain millions of references, associated with textual and visual information that is of paramount importance for the products to be found via search or browsing. Of particular significance is the book category, where the author name(s) field poses a significant challenge. Indeed, books written by a given author might be listed with different authors’ names due to abbreviations, spelling variants and mistakes, among others. To solve this problem at scale, we design a composite system involving open data sources for books, as well as deep learning components, such as approximate match with Siamese networks and name correction with sequence-to-sequence networks. We evaluate this approach on product data from the e-commerce website Rakuten France, and find that the top proposal of the system is the normalized author name with 72% accuracy.
%R 10.18653/v1/D19-5516
%U https://aclanthology.org/D19-5516
%U https://doi.org/10.18653/v1/D19-5516
%P 121-125
Markdown (Informal)
[Who wrote this book? A challenge for e-commerce](https://aclanthology.org/D19-5516) (Dumont et al., WNUT 2019)
ACL
- Béranger Dumont, Simona Maggio, Ghiles Sidi Said, and Quoc-Tien Au. 2019. Who wrote this book? A challenge for e-commerce. In Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019), pages 121–125, Hong Kong, China. Association for Computational Linguistics.