@inproceedings{zhang-etal-2024-chambi,
title = "{CHA}mbi: A New Benchmark on {C}hinese Ambiguity Challenges for Large Language Models",
author = "Zhang, Qin and
Cai, Sihan and
Zhao, Jiaxu and
Pechenizkiy, Mykola and
Fang, Meng",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.875",
pages = "14883--14898",
abstract = "Ambiguity is an inherent feature of language, whose management is crucial for effective communication and collaboration. This is particularly true for Chinese, a language with extensive lexical-morphemic ambiguity. Despite the wide use of large language models (LLMs) in numerous domains and their growing proficiency in Chinese, there is a notable lack of datasets to thoroughly evaluate LLMs{'} ability to handle ambiguity in Chinese. To bridge this gap, we introduce the CHAmbi dataset, a specialized Chinese multi-label disambiguation dataset formatted in Natural Language Inference. It comprises 4,991 pairs of premises and hypotheses, including 824 examples featuring a wide range of ambiguities. In addition to the dataset, we develop a series of tests and conduct an extensive evaluation of pre-trained LLMs{'} proficiency in identifying and resolving ambiguity in the Chinese language. Our findings reveal that GPT-4 consistently delivers commendable performance across various evaluative measures, albeit with limitations in robustness. The performances of other LLMs, however, demonstrate variability in handling ambiguity-related tasks, underscoring the complexity of such tasks in the context of Chinese. The overall results highlight the challenge of ambiguity handling for current LLMs and underscore the imperative need for further enhancement in LLM capabilities for effective ambiguity resolution in the Chinese language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2024-chambi">
<titleInfo>
<title>CHAmbi: A New Benchmark on Chinese Ambiguity Challenges for Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sihan</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaxu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mykola</namePart>
<namePart type="family">Pechenizkiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Ambiguity is an inherent feature of language, whose management is crucial for effective communication and collaboration. This is particularly true for Chinese, a language with extensive lexical-morphemic ambiguity. Despite the wide use of large language models (LLMs) in numerous domains and their growing proficiency in Chinese, there is a notable lack of datasets to thoroughly evaluate LLMs’ ability to handle ambiguity in Chinese. To bridge this gap, we introduce the CHAmbi dataset, a specialized Chinese multi-label disambiguation dataset formatted in Natural Language Inference. It comprises 4,991 pairs of premises and hypotheses, including 824 examples featuring a wide range of ambiguities. In addition to the dataset, we develop a series of tests and conduct an extensive evaluation of pre-trained LLMs’ proficiency in identifying and resolving ambiguity in the Chinese language. Our findings reveal that GPT-4 consistently delivers commendable performance across various evaluative measures, albeit with limitations in robustness. The performances of other LLMs, however, demonstrate variability in handling ambiguity-related tasks, underscoring the complexity of such tasks in the context of Chinese. The overall results highlight the challenge of ambiguity handling for current LLMs and underscore the imperative need for further enhancement in LLM capabilities for effective ambiguity resolution in the Chinese language.</abstract>
<identifier type="citekey">zhang-etal-2024-chambi</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.875</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14883</start>
<end>14898</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CHAmbi: A New Benchmark on Chinese Ambiguity Challenges for Large Language Models
%A Zhang, Qin
%A Cai, Sihan
%A Zhao, Jiaxu
%A Pechenizkiy, Mykola
%A Fang, Meng
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F zhang-etal-2024-chambi
%X Ambiguity is an inherent feature of language, whose management is crucial for effective communication and collaboration. This is particularly true for Chinese, a language with extensive lexical-morphemic ambiguity. Despite the wide use of large language models (LLMs) in numerous domains and their growing proficiency in Chinese, there is a notable lack of datasets to thoroughly evaluate LLMs’ ability to handle ambiguity in Chinese. To bridge this gap, we introduce the CHAmbi dataset, a specialized Chinese multi-label disambiguation dataset formatted in Natural Language Inference. It comprises 4,991 pairs of premises and hypotheses, including 824 examples featuring a wide range of ambiguities. In addition to the dataset, we develop a series of tests and conduct an extensive evaluation of pre-trained LLMs’ proficiency in identifying and resolving ambiguity in the Chinese language. Our findings reveal that GPT-4 consistently delivers commendable performance across various evaluative measures, albeit with limitations in robustness. The performances of other LLMs, however, demonstrate variability in handling ambiguity-related tasks, underscoring the complexity of such tasks in the context of Chinese. The overall results highlight the challenge of ambiguity handling for current LLMs and underscore the imperative need for further enhancement in LLM capabilities for effective ambiguity resolution in the Chinese language.
%U https://aclanthology.org/2024.findings-emnlp.875
%P 14883-14898
Markdown (Informal)
[CHAmbi: A New Benchmark on Chinese Ambiguity Challenges for Large Language Models](https://aclanthology.org/2024.findings-emnlp.875) (Zhang et al., Findings 2024)
ACL