@inproceedings{chen-etal-2023-sea,
title = "{S}ea{\_}and{\_}{W}ine at {S}em{E}val-2023 Task 9: A Regression Model with Data Augmentation for Multilingual Intimacy Analysis",
author = "Chen, Yuxi and
Chang, Yu and
Tao, Yanqing and
Zhang, Yanru",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.9",
doi = "10.18653/v1/2023.semeval-1.9",
pages = "77--82",
abstract = "In Task 9, we are required to analyze the textual intimacy of tweets in 10 languages. We fine-tune XLM-RoBERTa (XLM-R) pre-trained model to adapt to this multilingual regression task. After tentative experiments, severe class imbalance is observed in the official released dataset, which may compromise the convergence and weaken the model effect. To tackle such challenge, we take measures in two aspects. On the one hand, we implement data augmentation through machine translation to enlarge the scale of classes with fewer samples. On the other hand, we introduce focal mean square error (MSE) loss to emphasize the contributions of hard samples to total loss, thus further mitigating the impact of class imbalance on model effect. Extensive experiments demonstrate remarkable effectiveness of our strategies, and our model achieves high performance on the Pearson{'}s correlation coefficient (CC) almost above 0.85 on validation dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2023-sea">
<titleInfo>
<title>Sea_and_Wine at SemEval-2023 Task 9: A Regression Model with Data Augmentation for Multilingual Intimacy Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuxi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanqing</namePart>
<namePart type="family">Tao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanru</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In Task 9, we are required to analyze the textual intimacy of tweets in 10 languages. We fine-tune XLM-RoBERTa (XLM-R) pre-trained model to adapt to this multilingual regression task. After tentative experiments, severe class imbalance is observed in the official released dataset, which may compromise the convergence and weaken the model effect. To tackle such challenge, we take measures in two aspects. On the one hand, we implement data augmentation through machine translation to enlarge the scale of classes with fewer samples. On the other hand, we introduce focal mean square error (MSE) loss to emphasize the contributions of hard samples to total loss, thus further mitigating the impact of class imbalance on model effect. Extensive experiments demonstrate remarkable effectiveness of our strategies, and our model achieves high performance on the Pearson’s correlation coefficient (CC) almost above 0.85 on validation dataset.</abstract>
<identifier type="citekey">chen-etal-2023-sea</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.9</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.9</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>77</start>
<end>82</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sea_and_Wine at SemEval-2023 Task 9: A Regression Model with Data Augmentation for Multilingual Intimacy Analysis
%A Chen, Yuxi
%A Chang, Yu
%A Tao, Yanqing
%A Zhang, Yanru
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F chen-etal-2023-sea
%X In Task 9, we are required to analyze the textual intimacy of tweets in 10 languages. We fine-tune XLM-RoBERTa (XLM-R) pre-trained model to adapt to this multilingual regression task. After tentative experiments, severe class imbalance is observed in the official released dataset, which may compromise the convergence and weaken the model effect. To tackle such challenge, we take measures in two aspects. On the one hand, we implement data augmentation through machine translation to enlarge the scale of classes with fewer samples. On the other hand, we introduce focal mean square error (MSE) loss to emphasize the contributions of hard samples to total loss, thus further mitigating the impact of class imbalance on model effect. Extensive experiments demonstrate remarkable effectiveness of our strategies, and our model achieves high performance on the Pearson’s correlation coefficient (CC) almost above 0.85 on validation dataset.
%R 10.18653/v1/2023.semeval-1.9
%U https://aclanthology.org/2023.semeval-1.9
%U https://doi.org/10.18653/v1/2023.semeval-1.9
%P 77-82
Markdown (Informal)
[Sea_and_Wine at SemEval-2023 Task 9: A Regression Model with Data Augmentation for Multilingual Intimacy Analysis](https://aclanthology.org/2023.semeval-1.9) (Chen et al., SemEval 2023)
ACL