@inproceedings{loke-etal-2025-abdn,
title = "{ABDN}-{NLP} at {C}o{M}e{D}i Shared Task: Predicting the Aggregated Human Judgment via Weighted Few-Shot Prompting",
author = "Loke, Ying Xuan and
Schlechtweg, Dominik and
Zhao, Wei",
editor = "Roth, Michael and
Schlechtweg, Dominik",
booktitle = "Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.comedi-1.12/",
pages = "122--128",
abstract = "Human annotation is notorious for being subjective and expensive. Recently, (CITATION) introduced the CoMeDi shared task aiming to address this issue by predicting human annotations on the semantic proximity between word uses, and estimating the variation of the human annotations. However, distinguishing the proximity between word uses can be challenging, when their semantic difference is subtle. In this work, we focus on predicting the aggregated annotator judgment of semantic proximity by using a large language model fine-tuned on 20 examples with various proximity classes. To distinguish nuanced proximity, we propose a weighted few-shot approach that pays greater attention to the proximity classes identified as important during fine-tuning. We evaluate our approach in the CoMeDi shared task across 7 languages. Our results demonstrate the superiority of our approach over zero-shot and standard few-shot counterparts. While useful, the weighted few-shot should be applied with caution, given that it relies on development sets to compute the importance of proximity classes, and thus may not generalize well to real-world scenarios where the distribution of class importance is different."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="loke-etal-2025-abdn">
<titleInfo>
<title>ABDN-NLP at CoMeDi Shared Task: Predicting the Aggregated Human Judgment via Weighted Few-Shot Prompting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ying</namePart>
<namePart type="given">Xuan</namePart>
<namePart type="family">Loke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Schlechtweg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Schlechtweg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Human annotation is notorious for being subjective and expensive. Recently, (CITATION) introduced the CoMeDi shared task aiming to address this issue by predicting human annotations on the semantic proximity between word uses, and estimating the variation of the human annotations. However, distinguishing the proximity between word uses can be challenging, when their semantic difference is subtle. In this work, we focus on predicting the aggregated annotator judgment of semantic proximity by using a large language model fine-tuned on 20 examples with various proximity classes. To distinguish nuanced proximity, we propose a weighted few-shot approach that pays greater attention to the proximity classes identified as important during fine-tuning. We evaluate our approach in the CoMeDi shared task across 7 languages. Our results demonstrate the superiority of our approach over zero-shot and standard few-shot counterparts. While useful, the weighted few-shot should be applied with caution, given that it relies on development sets to compute the importance of proximity classes, and thus may not generalize well to real-world scenarios where the distribution of class importance is different.</abstract>
<identifier type="citekey">loke-etal-2025-abdn</identifier>
<location>
<url>https://aclanthology.org/2025.comedi-1.12/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>122</start>
<end>128</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ABDN-NLP at CoMeDi Shared Task: Predicting the Aggregated Human Judgment via Weighted Few-Shot Prompting
%A Loke, Ying Xuan
%A Schlechtweg, Dominik
%A Zhao, Wei
%Y Roth, Michael
%Y Schlechtweg, Dominik
%S Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F loke-etal-2025-abdn
%X Human annotation is notorious for being subjective and expensive. Recently, (CITATION) introduced the CoMeDi shared task aiming to address this issue by predicting human annotations on the semantic proximity between word uses, and estimating the variation of the human annotations. However, distinguishing the proximity between word uses can be challenging, when their semantic difference is subtle. In this work, we focus on predicting the aggregated annotator judgment of semantic proximity by using a large language model fine-tuned on 20 examples with various proximity classes. To distinguish nuanced proximity, we propose a weighted few-shot approach that pays greater attention to the proximity classes identified as important during fine-tuning. We evaluate our approach in the CoMeDi shared task across 7 languages. Our results demonstrate the superiority of our approach over zero-shot and standard few-shot counterparts. While useful, the weighted few-shot should be applied with caution, given that it relies on development sets to compute the importance of proximity classes, and thus may not generalize well to real-world scenarios where the distribution of class importance is different.
%U https://aclanthology.org/2025.comedi-1.12/
%P 122-128
Markdown (Informal)
[ABDN-NLP at CoMeDi Shared Task: Predicting the Aggregated Human Judgment via Weighted Few-Shot Prompting](https://aclanthology.org/2025.comedi-1.12/) (Loke et al., CoMeDi 2025)
ACL