@inproceedings{ueffing-etal-2018-quality,
title = "Quality Estimation for Automatically Generated Titles of e{C}ommerce Browse Pages",
author = "Ueffing, Nicola and
C. de Souza, Jos{\'e} G. and
Leusch, Gregor",
editor = "Bangalore, Srinivas and
Chu-Carroll, Jennifer and
Li, Yunyao",
booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)",
month = jun,
year = "2018",
address = "New Orleans - Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N18-3007/",
doi = "10.18653/v1/N18-3007",
pages = "52--59",
abstract = "At eBay, we are automatically generating a large amount of natural language titles for eCommerce browse pages using machine translation (MT) technology. While automatic approaches can generate millions of titles very fast, they are prone to errors. We therefore develop quality estimation (QE) methods which can automatically detect titles with low quality in order to prevent them from going live. In this paper, we present different approaches: The first one is a Random Forest (RF) model that explores hand-crafted, robust features, which are a mix of established features commonly used in Machine Translation Quality Estimation (MTQE) and new features developed specifically for our task. The second model is based on Siamese Networks (SNs) which embed the metadata input sequence and the generated title in the same space and do not require hand-crafted features at all. We thoroughly evaluate and compare those approaches on in-house data. While the RF models are competitive for scenarios with smaller amounts of training data and somewhat more robust, they are clearly outperformed by the SN models when the amount of training data is larger."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ueffing-etal-2018-quality">
<titleInfo>
<title>Quality Estimation for Automatically Generated Titles of eCommerce Browse Pages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Ueffing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">G</namePart>
<namePart type="family">C. de Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gregor</namePart>
<namePart type="family">Leusch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Srinivas</namePart>
<namePart type="family">Bangalore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jennifer</namePart>
<namePart type="family">Chu-Carroll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans - Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>At eBay, we are automatically generating a large amount of natural language titles for eCommerce browse pages using machine translation (MT) technology. While automatic approaches can generate millions of titles very fast, they are prone to errors. We therefore develop quality estimation (QE) methods which can automatically detect titles with low quality in order to prevent them from going live. In this paper, we present different approaches: The first one is a Random Forest (RF) model that explores hand-crafted, robust features, which are a mix of established features commonly used in Machine Translation Quality Estimation (MTQE) and new features developed specifically for our task. The second model is based on Siamese Networks (SNs) which embed the metadata input sequence and the generated title in the same space and do not require hand-crafted features at all. We thoroughly evaluate and compare those approaches on in-house data. While the RF models are competitive for scenarios with smaller amounts of training data and somewhat more robust, they are clearly outperformed by the SN models when the amount of training data is larger.</abstract>
<identifier type="citekey">ueffing-etal-2018-quality</identifier>
<identifier type="doi">10.18653/v1/N18-3007</identifier>
<location>
<url>https://aclanthology.org/N18-3007/</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>52</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quality Estimation for Automatically Generated Titles of eCommerce Browse Pages
%A Ueffing, Nicola
%A C. de Souza, José G.
%A Leusch, Gregor
%Y Bangalore, Srinivas
%Y Chu-Carroll, Jennifer
%Y Li, Yunyao
%S Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans - Louisiana
%F ueffing-etal-2018-quality
%X At eBay, we are automatically generating a large amount of natural language titles for eCommerce browse pages using machine translation (MT) technology. While automatic approaches can generate millions of titles very fast, they are prone to errors. We therefore develop quality estimation (QE) methods which can automatically detect titles with low quality in order to prevent them from going live. In this paper, we present different approaches: The first one is a Random Forest (RF) model that explores hand-crafted, robust features, which are a mix of established features commonly used in Machine Translation Quality Estimation (MTQE) and new features developed specifically for our task. The second model is based on Siamese Networks (SNs) which embed the metadata input sequence and the generated title in the same space and do not require hand-crafted features at all. We thoroughly evaluate and compare those approaches on in-house data. While the RF models are competitive for scenarios with smaller amounts of training data and somewhat more robust, they are clearly outperformed by the SN models when the amount of training data is larger.
%R 10.18653/v1/N18-3007
%U https://aclanthology.org/N18-3007/
%U https://doi.org/10.18653/v1/N18-3007
%P 52-59
Markdown (Informal)
[Quality Estimation for Automatically Generated Titles of eCommerce Browse Pages](https://aclanthology.org/N18-3007/) (Ueffing et al., NAACL 2018)
ACL
- Nicola Ueffing, José G. C. de Souza, and Gregor Leusch. 2018. Quality Estimation for Automatically Generated Titles of eCommerce Browse Pages. In Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers), pages 52–59, New Orleans - Louisiana. Association for Computational Linguistics.