@inproceedings{latif-etal-2025-restaurant,
title = "Restaurant Menu Categorization at Scale: {LLM}-Guided Hybrid Clustering",
author = "Latif, Seemab and
Mehmood, Ashar and
Turki, Selim and
Ameer, Huma and
Gorban, Ivan and
Fateh, Faysal",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.inlg-main.31/",
pages = "511--521",
abstract = "Inconsistent naming of menu items across merchants presents a major challenge for businesses that rely on large-scale menu item catalogs. It hinders downstream tasks like pricing analysis, menu item deduplication, and recommendations. To address this, we propose the Cross-Platform Semantic Alignment Framework (CPSAF), a hybrid approach that integrates DBSCAN-based clustering with SIGMA (Semantic Item Grouping and Menu Abstraction), a Large Language Model based refinement module. SIGMA employs in-context learning with a large language model to generate generic menu item names and categories. We evaluate our framework on a proprietary dataset comprising over 700,000 unique menu items. Experiments involve tuning DBSCAN parameters and applying SIGMA to refine clusters. The performance is assessed using both structural metrics i.e. cluster count, coverage and semantic metrics i.e. intra and inter-cluster similarity along with manual qualitative inspection. CPSAF improves intra-cluster similarity from 0.88 to 0.98 and reduces singleton clusters by 33{\%}, demonstrating its effectiveness in recovering soft semantic drift."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="latif-etal-2025-restaurant">
<titleInfo>
<title>Restaurant Menu Categorization at Scale: LLM-Guided Hybrid Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seemab</namePart>
<namePart type="family">Latif</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashar</namePart>
<namePart type="family">Mehmood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Selim</namePart>
<namePart type="family">Turki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huma</namePart>
<namePart type="family">Ameer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Gorban</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faysal</namePart>
<namePart type="family">Fateh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashi</namePart>
<namePart type="family">Narayan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lê</namePart>
<namePart type="given">Hồng</namePart>
<namePart type="family">Phương</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahuan</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hanoi, Vietnam</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Inconsistent naming of menu items across merchants presents a major challenge for businesses that rely on large-scale menu item catalogs. It hinders downstream tasks like pricing analysis, menu item deduplication, and recommendations. To address this, we propose the Cross-Platform Semantic Alignment Framework (CPSAF), a hybrid approach that integrates DBSCAN-based clustering with SIGMA (Semantic Item Grouping and Menu Abstraction), a Large Language Model based refinement module. SIGMA employs in-context learning with a large language model to generate generic menu item names and categories. We evaluate our framework on a proprietary dataset comprising over 700,000 unique menu items. Experiments involve tuning DBSCAN parameters and applying SIGMA to refine clusters. The performance is assessed using both structural metrics i.e. cluster count, coverage and semantic metrics i.e. intra and inter-cluster similarity along with manual qualitative inspection. CPSAF improves intra-cluster similarity from 0.88 to 0.98 and reduces singleton clusters by 33%, demonstrating its effectiveness in recovering soft semantic drift.</abstract>
<identifier type="citekey">latif-etal-2025-restaurant</identifier>
<location>
<url>https://aclanthology.org/2025.inlg-main.31/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>511</start>
<end>521</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Restaurant Menu Categorization at Scale: LLM-Guided Hybrid Clustering
%A Latif, Seemab
%A Mehmood, Ashar
%A Turki, Selim
%A Ameer, Huma
%A Gorban, Ivan
%A Fateh, Faysal
%Y Flek, Lucie
%Y Narayan, Shashi
%Y Phương, Lê Hồng
%Y Pei, Jiahuan
%S Proceedings of the 18th International Natural Language Generation Conference
%D 2025
%8 October
%I Association for Computational Linguistics
%C Hanoi, Vietnam
%F latif-etal-2025-restaurant
%X Inconsistent naming of menu items across merchants presents a major challenge for businesses that rely on large-scale menu item catalogs. It hinders downstream tasks like pricing analysis, menu item deduplication, and recommendations. To address this, we propose the Cross-Platform Semantic Alignment Framework (CPSAF), a hybrid approach that integrates DBSCAN-based clustering with SIGMA (Semantic Item Grouping and Menu Abstraction), a Large Language Model based refinement module. SIGMA employs in-context learning with a large language model to generate generic menu item names and categories. We evaluate our framework on a proprietary dataset comprising over 700,000 unique menu items. Experiments involve tuning DBSCAN parameters and applying SIGMA to refine clusters. The performance is assessed using both structural metrics i.e. cluster count, coverage and semantic metrics i.e. intra and inter-cluster similarity along with manual qualitative inspection. CPSAF improves intra-cluster similarity from 0.88 to 0.98 and reduces singleton clusters by 33%, demonstrating its effectiveness in recovering soft semantic drift.
%U https://aclanthology.org/2025.inlg-main.31/
%P 511-521
Markdown (Informal)
[Restaurant Menu Categorization at Scale: LLM-Guided Hybrid Clustering](https://aclanthology.org/2025.inlg-main.31/) (Latif et al., INLG 2025)
ACL