@inproceedings{yu-etal-2023-dutir,
title = "{DUTIR} at {S}em{E}val-2023 Task 10: Semi-supervised Learning for Sexism Detection in {E}nglish",
author = "Yu, Bingjie and
Bai, Zewen and
Ji, Haoran and
Li, Shiyi and
Zhang, Hao and
Lin, Hongfei",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.123",
doi = "10.18653/v1/2023.semeval-1.123",
pages = "892--896",
abstract = "Sexism is an injustice afflicting women and has become a common form of oppression in social media. In recent years, the automatic detection of sexist instances has been utilized to combat this oppression. The Subtask A of SemEval-2023 Task 10, Explainable Detection of Online Sexism, aims to detect whether an English-language post is sexist. In this paper, we describe our system for the competition. The structure of the classification model is based on RoBERTa, and we further pre-train it on the domain corpus. For fine-tuning, we adopt Unsupervised Data Augmentation (UDA), a semi-supervised learning approach, to improve the robustness of the system. Specifically, we employ Easy Data Augmentation (EDA) method as the noising operation for consistency training. We train multiple models based on different hyperparameter settings and adopt the majority voting method to predict the labels of test entries. Our proposed system achieves a Macro-F1 score of 0.8352 and a ranking of 41/84 on the leaderboard of Subtask A.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yu-etal-2023-dutir">
<titleInfo>
<title>DUTIR at SemEval-2023 Task 10: Semi-supervised Learning for Sexism Detection in English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bingjie</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zewen</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoran</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiyi</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongfei</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sexism is an injustice afflicting women and has become a common form of oppression in social media. In recent years, the automatic detection of sexist instances has been utilized to combat this oppression. The Subtask A of SemEval-2023 Task 10, Explainable Detection of Online Sexism, aims to detect whether an English-language post is sexist. In this paper, we describe our system for the competition. The structure of the classification model is based on RoBERTa, and we further pre-train it on the domain corpus. For fine-tuning, we adopt Unsupervised Data Augmentation (UDA), a semi-supervised learning approach, to improve the robustness of the system. Specifically, we employ Easy Data Augmentation (EDA) method as the noising operation for consistency training. We train multiple models based on different hyperparameter settings and adopt the majority voting method to predict the labels of test entries. Our proposed system achieves a Macro-F1 score of 0.8352 and a ranking of 41/84 on the leaderboard of Subtask A.</abstract>
<identifier type="citekey">yu-etal-2023-dutir</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.123</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.123</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>892</start>
<end>896</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DUTIR at SemEval-2023 Task 10: Semi-supervised Learning for Sexism Detection in English
%A Yu, Bingjie
%A Bai, Zewen
%A Ji, Haoran
%A Li, Shiyi
%A Zhang, Hao
%A Lin, Hongfei
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F yu-etal-2023-dutir
%X Sexism is an injustice afflicting women and has become a common form of oppression in social media. In recent years, the automatic detection of sexist instances has been utilized to combat this oppression. The Subtask A of SemEval-2023 Task 10, Explainable Detection of Online Sexism, aims to detect whether an English-language post is sexist. In this paper, we describe our system for the competition. The structure of the classification model is based on RoBERTa, and we further pre-train it on the domain corpus. For fine-tuning, we adopt Unsupervised Data Augmentation (UDA), a semi-supervised learning approach, to improve the robustness of the system. Specifically, we employ Easy Data Augmentation (EDA) method as the noising operation for consistency training. We train multiple models based on different hyperparameter settings and adopt the majority voting method to predict the labels of test entries. Our proposed system achieves a Macro-F1 score of 0.8352 and a ranking of 41/84 on the leaderboard of Subtask A.
%R 10.18653/v1/2023.semeval-1.123
%U https://aclanthology.org/2023.semeval-1.123
%U https://doi.org/10.18653/v1/2023.semeval-1.123
%P 892-896
Markdown (Informal)
[DUTIR at SemEval-2023 Task 10: Semi-supervised Learning for Sexism Detection in English](https://aclanthology.org/2023.semeval-1.123) (Yu et al., SemEval 2023)
ACL