@inproceedings{francis-moens-2024-kul,
title = "{KUL}@{SMM}4{H}2024: Optimizing Text Classification with Quality-Assured Augmentation Strategies",
author = "Francis, Sumam and
Moens, Marie-Francine",
editor = "Xu, Dongfang and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of The 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.smm4h-1.33",
pages = "142--145",
abstract = "This paper presents our models for the Social Media Mining for Health 2024 shared task, specifically Task 5, which involves classifying tweets reporting a child with childhood disorders (annotated as {``}1{''}) versus those merely mentioning a disorder (annotated as {``}0{''}). We utilized a classification model enhanced with diverse textual and language model-based augmentations. To ensure quality, we used semantic similarity, perplexity, and lexical diversity as evaluation metrics. Combining supervised contrastive learning and cross-entropy-based learning, our best model, incorporating R-drop and various LM generation-based augmentations, achieved an impressive F1 score of 0.9230 on the test set, surpassing the task mean and median scores.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="francis-moens-2024-kul">
<titleInfo>
<title>KUL@SMM4H2024: Optimizing Text Classification with Quality-Assured Augmentation Strategies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sumam</namePart>
<namePart type="family">Francis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dongfang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents our models for the Social Media Mining for Health 2024 shared task, specifically Task 5, which involves classifying tweets reporting a child with childhood disorders (annotated as “1”) versus those merely mentioning a disorder (annotated as “0”). We utilized a classification model enhanced with diverse textual and language model-based augmentations. To ensure quality, we used semantic similarity, perplexity, and lexical diversity as evaluation metrics. Combining supervised contrastive learning and cross-entropy-based learning, our best model, incorporating R-drop and various LM generation-based augmentations, achieved an impressive F1 score of 0.9230 on the test set, surpassing the task mean and median scores.</abstract>
<identifier type="citekey">francis-moens-2024-kul</identifier>
<location>
<url>https://aclanthology.org/2024.smm4h-1.33</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>142</start>
<end>145</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T KUL@SMM4H2024: Optimizing Text Classification with Quality-Assured Augmentation Strategies
%A Francis, Sumam
%A Moens, Marie-Francine
%Y Xu, Dongfang
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of The 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F francis-moens-2024-kul
%X This paper presents our models for the Social Media Mining for Health 2024 shared task, specifically Task 5, which involves classifying tweets reporting a child with childhood disorders (annotated as “1”) versus those merely mentioning a disorder (annotated as “0”). We utilized a classification model enhanced with diverse textual and language model-based augmentations. To ensure quality, we used semantic similarity, perplexity, and lexical diversity as evaluation metrics. Combining supervised contrastive learning and cross-entropy-based learning, our best model, incorporating R-drop and various LM generation-based augmentations, achieved an impressive F1 score of 0.9230 on the test set, surpassing the task mean and median scores.
%U https://aclanthology.org/2024.smm4h-1.33
%P 142-145
Markdown (Informal)
[KUL@SMM4H2024: Optimizing Text Classification with Quality-Assured Augmentation Strategies](https://aclanthology.org/2024.smm4h-1.33) (Francis & Moens, SMM4H-WS 2024)
ACL