@inproceedings{huang-etal-2025-large,
title = "Can Large Language Models Understand {I}nternet Buzzwords Through User-Generated Content",
author = "Huang, Chen and
Luo, Junkai and
Wang, Xinzuo and
Lei, Wenqiang and
Lv, Jiancheng",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.632/",
doi = "10.18653/v1/2025.acl-long.632",
pages = "12916--12941",
ISBN = "979-8-89176-251-0",
abstract = "The massive user-generated content (UGC) available in Chinese social media is giving rise to the possibility of studying internet buzzwords. In this paper, we study if large language models (LLMs) can generate accurate definitions for these buzzwords based on UGC as examples. Our work serves a threefold contribution. First, we introduce CHEER, the first dataset of Chinese internet buzzwords, each annotated with a definition and relevant UGC. Second, we propose a novel method, called RESS, to effectively steer the comprehending process of LLMs to produce more accurate buzzword definitions, mirroring the skills of human language learning. Third, with CHEER, we benchmark the strengths and weaknesses of various off-the-shelf definition generation methods and our RESS. Our benchmark demonstrates the effectiveness of RESS while revealing a crucial shared challenge: comprehending unseen buzzwords and leveraging sufficient, high-quality UGC to facilitate this comprehension. In this paper, we believe our work lays the groundwork for future advancements in LLM-based definition generation. Our dataset and code will be openly released."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2025-large">
<titleInfo>
<title>Can Large Language Models Understand Internet Buzzwords Through User-Generated Content</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junkai</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinzuo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenqiang</namePart>
<namePart type="family">Lei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiancheng</namePart>
<namePart type="family">Lv</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>The massive user-generated content (UGC) available in Chinese social media is giving rise to the possibility of studying internet buzzwords. In this paper, we study if large language models (LLMs) can generate accurate definitions for these buzzwords based on UGC as examples. Our work serves a threefold contribution. First, we introduce CHEER, the first dataset of Chinese internet buzzwords, each annotated with a definition and relevant UGC. Second, we propose a novel method, called RESS, to effectively steer the comprehending process of LLMs to produce more accurate buzzword definitions, mirroring the skills of human language learning. Third, with CHEER, we benchmark the strengths and weaknesses of various off-the-shelf definition generation methods and our RESS. Our benchmark demonstrates the effectiveness of RESS while revealing a crucial shared challenge: comprehending unseen buzzwords and leveraging sufficient, high-quality UGC to facilitate this comprehension. In this paper, we believe our work lays the groundwork for future advancements in LLM-based definition generation. Our dataset and code will be openly released.</abstract>
<identifier type="citekey">huang-etal-2025-large</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.632</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.632/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>12916</start>
<end>12941</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Large Language Models Understand Internet Buzzwords Through User-Generated Content
%A Huang, Chen
%A Luo, Junkai
%A Wang, Xinzuo
%A Lei, Wenqiang
%A Lv, Jiancheng
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F huang-etal-2025-large
%X The massive user-generated content (UGC) available in Chinese social media is giving rise to the possibility of studying internet buzzwords. In this paper, we study if large language models (LLMs) can generate accurate definitions for these buzzwords based on UGC as examples. Our work serves a threefold contribution. First, we introduce CHEER, the first dataset of Chinese internet buzzwords, each annotated with a definition and relevant UGC. Second, we propose a novel method, called RESS, to effectively steer the comprehending process of LLMs to produce more accurate buzzword definitions, mirroring the skills of human language learning. Third, with CHEER, we benchmark the strengths and weaknesses of various off-the-shelf definition generation methods and our RESS. Our benchmark demonstrates the effectiveness of RESS while revealing a crucial shared challenge: comprehending unseen buzzwords and leveraging sufficient, high-quality UGC to facilitate this comprehension. In this paper, we believe our work lays the groundwork for future advancements in LLM-based definition generation. Our dataset and code will be openly released.
%R 10.18653/v1/2025.acl-long.632
%U https://aclanthology.org/2025.acl-long.632/
%U https://doi.org/10.18653/v1/2025.acl-long.632
%P 12916-12941
Markdown (Informal)
[Can Large Language Models Understand Internet Buzzwords Through User-Generated Content](https://aclanthology.org/2025.acl-long.632/) (Huang et al., ACL 2025)
ACL