@inproceedings{le-van-2025-mmlabuit,
title = "{MML}ab{UIT} at {C}o{M}e{D}i{S}hared Task: Text Embedding Techniques versus Generation-Based {NLI} for Median Judgment Classification",
author = "Le, Tai Duc and
Van, Thin Dang",
editor = "Roth, Michael and
Schlechtweg, Dominik",
booktitle = "Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.comedi-1.11/",
pages = "113--121",
abstract = "This paper presents our approach in the COLING2025-CoMeDi task in 7 languages, focusing on sub-task 1: Median Judgment Classification with Ordinal Word-in-Context Judgments (OGWiC). Specifically, we need to determine the meaning relation of one word in two different contexts and classify the input into 4 labels. To address sub-task 1, we implement and investigate various solutions, including (1) Stacking, Averaged Embedding techniques with a multilingual BERT-based model; and (2) utilizing a Natural Language Inference approach instead of a regular classification process. All the experiments were conducted on the P100 GPU from the Kaggle platform. To enhance the context of input, we perform Improve Known Data Rate and Text Expansion in some languages. For model focusing purposes Custom Token was used in the data processing pipeline. Our best official results on the test set are 0.515, 0.518, and 0.524 in terms of Krippendorff`s {\ensuremath{\alpha}} score on task 1. Our participation system achieved a Top 3 ranking in task 1. Besides the official result, our best approach also achieved 0.596 regarding Krippendorff`s {\ensuremath{\alpha}} score on Task 1."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="le-van-2025-mmlabuit">
<titleInfo>
<title>MMLabUIT at CoMeDiShared Task: Text Embedding Techniques versus Generation-Based NLI for Median Judgment Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tai</namePart>
<namePart type="given">Duc</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thin</namePart>
<namePart type="given">Dang</namePart>
<namePart type="family">Van</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Schlechtweg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents our approach in the COLING2025-CoMeDi task in 7 languages, focusing on sub-task 1: Median Judgment Classification with Ordinal Word-in-Context Judgments (OGWiC). Specifically, we need to determine the meaning relation of one word in two different contexts and classify the input into 4 labels. To address sub-task 1, we implement and investigate various solutions, including (1) Stacking, Averaged Embedding techniques with a multilingual BERT-based model; and (2) utilizing a Natural Language Inference approach instead of a regular classification process. All the experiments were conducted on the P100 GPU from the Kaggle platform. To enhance the context of input, we perform Improve Known Data Rate and Text Expansion in some languages. For model focusing purposes Custom Token was used in the data processing pipeline. Our best official results on the test set are 0.515, 0.518, and 0.524 in terms of Krippendorff‘s \ensuremathα score on task 1. Our participation system achieved a Top 3 ranking in task 1. Besides the official result, our best approach also achieved 0.596 regarding Krippendorff‘s \ensuremathα score on Task 1.</abstract>
<identifier type="citekey">le-van-2025-mmlabuit</identifier>
<location>
<url>https://aclanthology.org/2025.comedi-1.11/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>113</start>
<end>121</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MMLabUIT at CoMeDiShared Task: Text Embedding Techniques versus Generation-Based NLI for Median Judgment Classification
%A Le, Tai Duc
%A Van, Thin Dang
%Y Roth, Michael
%Y Schlechtweg, Dominik
%S Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F le-van-2025-mmlabuit
%X This paper presents our approach in the COLING2025-CoMeDi task in 7 languages, focusing on sub-task 1: Median Judgment Classification with Ordinal Word-in-Context Judgments (OGWiC). Specifically, we need to determine the meaning relation of one word in two different contexts and classify the input into 4 labels. To address sub-task 1, we implement and investigate various solutions, including (1) Stacking, Averaged Embedding techniques with a multilingual BERT-based model; and (2) utilizing a Natural Language Inference approach instead of a regular classification process. All the experiments were conducted on the P100 GPU from the Kaggle platform. To enhance the context of input, we perform Improve Known Data Rate and Text Expansion in some languages. For model focusing purposes Custom Token was used in the data processing pipeline. Our best official results on the test set are 0.515, 0.518, and 0.524 in terms of Krippendorff‘s \ensuremathα score on task 1. Our participation system achieved a Top 3 ranking in task 1. Besides the official result, our best approach also achieved 0.596 regarding Krippendorff‘s \ensuremathα score on Task 1.
%U https://aclanthology.org/2025.comedi-1.11/
%P 113-121
Markdown (Informal)
[MMLabUIT at CoMeDiShared Task: Text Embedding Techniques versus Generation-Based NLI for Median Judgment Classification](https://aclanthology.org/2025.comedi-1.11/) (Le & Van, CoMeDi 2025)
ACL