@inproceedings{hongwimol-etal-2025-gavel,
title = "{GAVEL}: Generative Attribute-Value Extraction Using {LLM}s on {LLM}-Augmented Datasets",
author = "Hongwimol, Pollawat and
Sheng, Dong and
Zhang, Li and
Liu, Kai and
Wang, Xiufei",
editor = "Shi, Weijia and
Yu, Wenhao and
Asai, Akari and
Jiang, Meng and
Durrett, Greg and
Hajishirzi, Hannaneh and
Zettlemoyer, Luke",
booktitle = "Proceedings of the 4th International Workshop on Knowledge-Augmented Methods for Natural Language Processing",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.knowledgenlp-1.6/",
doi = "10.18653/v1/2025.knowledgenlp-1.6",
pages = "81--90",
ISBN = "979-8-89176-229-9",
abstract = "In the evolving e-commerce landscape, accurate product attribute-value extraction is crucial for enhancing user experience and increasing sales. This paper introduces GAVEL, a generative approach leveraging large language models (LLMs) to augment training data for attribute extraction from diverse textual sources. Our method extracts over 1,000 unique attributes across 2,000 product categories in multiple Southeast Asian languages, including Thai, Vietnamese, and Indonesian. Rigorous evaluations show significant improvements in accuracy and coverage compared to seller-provided attributes, with enhanced recall and F1 scores. Additionally, GAVEL reduces operational costs by minimizing instruction token usage and improves inference speed. The results of the A/B testing indicate that our model has a positive impact on Gross Merchandise Value (GMV) per page view (PV) across all three operating countries. This research highlights the potential of generative techniques for optimizing attribute extraction in multi-language e-commerce applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hongwimol-etal-2025-gavel">
<titleInfo>
<title>GAVEL: Generative Attribute-Value Extraction Using LLMs on LLM-Augmented Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pollawat</namePart>
<namePart type="family">Hongwimol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dong</namePart>
<namePart type="family">Sheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiufei</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Workshop on Knowledge-Augmented Methods for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weijia</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akari</namePart>
<namePart type="family">Asai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-229-9</identifier>
</relatedItem>
<abstract>In the evolving e-commerce landscape, accurate product attribute-value extraction is crucial for enhancing user experience and increasing sales. This paper introduces GAVEL, a generative approach leveraging large language models (LLMs) to augment training data for attribute extraction from diverse textual sources. Our method extracts over 1,000 unique attributes across 2,000 product categories in multiple Southeast Asian languages, including Thai, Vietnamese, and Indonesian. Rigorous evaluations show significant improvements in accuracy and coverage compared to seller-provided attributes, with enhanced recall and F1 scores. Additionally, GAVEL reduces operational costs by minimizing instruction token usage and improves inference speed. The results of the A/B testing indicate that our model has a positive impact on Gross Merchandise Value (GMV) per page view (PV) across all three operating countries. This research highlights the potential of generative techniques for optimizing attribute extraction in multi-language e-commerce applications.</abstract>
<identifier type="citekey">hongwimol-etal-2025-gavel</identifier>
<identifier type="doi">10.18653/v1/2025.knowledgenlp-1.6</identifier>
<location>
<url>https://aclanthology.org/2025.knowledgenlp-1.6/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>81</start>
<end>90</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GAVEL: Generative Attribute-Value Extraction Using LLMs on LLM-Augmented Datasets
%A Hongwimol, Pollawat
%A Sheng, Dong
%A Zhang, Li
%A Liu, Kai
%A Wang, Xiufei
%Y Shi, Weijia
%Y Yu, Wenhao
%Y Asai, Akari
%Y Jiang, Meng
%Y Durrett, Greg
%Y Hajishirzi, Hannaneh
%Y Zettlemoyer, Luke
%S Proceedings of the 4th International Workshop on Knowledge-Augmented Methods for Natural Language Processing
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico, USA
%@ 979-8-89176-229-9
%F hongwimol-etal-2025-gavel
%X In the evolving e-commerce landscape, accurate product attribute-value extraction is crucial for enhancing user experience and increasing sales. This paper introduces GAVEL, a generative approach leveraging large language models (LLMs) to augment training data for attribute extraction from diverse textual sources. Our method extracts over 1,000 unique attributes across 2,000 product categories in multiple Southeast Asian languages, including Thai, Vietnamese, and Indonesian. Rigorous evaluations show significant improvements in accuracy and coverage compared to seller-provided attributes, with enhanced recall and F1 scores. Additionally, GAVEL reduces operational costs by minimizing instruction token usage and improves inference speed. The results of the A/B testing indicate that our model has a positive impact on Gross Merchandise Value (GMV) per page view (PV) across all three operating countries. This research highlights the potential of generative techniques for optimizing attribute extraction in multi-language e-commerce applications.
%R 10.18653/v1/2025.knowledgenlp-1.6
%U https://aclanthology.org/2025.knowledgenlp-1.6/
%U https://doi.org/10.18653/v1/2025.knowledgenlp-1.6
%P 81-90
Markdown (Informal)
[GAVEL: Generative Attribute-Value Extraction Using LLMs on LLM-Augmented Datasets](https://aclanthology.org/2025.knowledgenlp-1.6/) (Hongwimol et al., KnowledgeNLP 2025)
ACL