@inproceedings{gajewska-2023-eevvgg,
title = "eevvgg at {S}em{E}val-2023 Task 11: Offensive Language Classification with Rater-based Information",
author = "Gajewska, Ewelina",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.24",
doi = "10.18653/v1/2023.semeval-1.24",
pages = "171--176",
abstract = "A standard majority-based approach to text classification is challenged with an individualised approach in the Semeval-2023 Task 11. Here, disagreements are treated as a useful source of information that could be utilised in the training pipeline. The team proposal makes use of partially disaggregated data and additional information about annotators provided by the organisers to train a BERT-based model for offensive text classification. The approach extends previous studies examining the impact of using raters{'} demographic features on classification performance (Hovy, 2015) or training machine learning models on disaggregated data (Davani et al., 2022). The proposed approach was ranked 11 across all 4 datasets, scoring best for cases with a large pool of annotators (6th place in the MD-Agreement dataset) utilising features based on raters{'} annotation behaviour.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gajewska-2023-eevvgg">
<titleInfo>
<title>eevvgg at SemEval-2023 Task 11: Offensive Language Classification with Rater-based Information</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ewelina</namePart>
<namePart type="family">Gajewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A standard majority-based approach to text classification is challenged with an individualised approach in the Semeval-2023 Task 11. Here, disagreements are treated as a useful source of information that could be utilised in the training pipeline. The team proposal makes use of partially disaggregated data and additional information about annotators provided by the organisers to train a BERT-based model for offensive text classification. The approach extends previous studies examining the impact of using raters’ demographic features on classification performance (Hovy, 2015) or training machine learning models on disaggregated data (Davani et al., 2022). The proposed approach was ranked 11 across all 4 datasets, scoring best for cases with a large pool of annotators (6th place in the MD-Agreement dataset) utilising features based on raters’ annotation behaviour.</abstract>
<identifier type="citekey">gajewska-2023-eevvgg</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.24</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.24</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>171</start>
<end>176</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T eevvgg at SemEval-2023 Task 11: Offensive Language Classification with Rater-based Information
%A Gajewska, Ewelina
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F gajewska-2023-eevvgg
%X A standard majority-based approach to text classification is challenged with an individualised approach in the Semeval-2023 Task 11. Here, disagreements are treated as a useful source of information that could be utilised in the training pipeline. The team proposal makes use of partially disaggregated data and additional information about annotators provided by the organisers to train a BERT-based model for offensive text classification. The approach extends previous studies examining the impact of using raters’ demographic features on classification performance (Hovy, 2015) or training machine learning models on disaggregated data (Davani et al., 2022). The proposed approach was ranked 11 across all 4 datasets, scoring best for cases with a large pool of annotators (6th place in the MD-Agreement dataset) utilising features based on raters’ annotation behaviour.
%R 10.18653/v1/2023.semeval-1.24
%U https://aclanthology.org/2023.semeval-1.24
%U https://doi.org/10.18653/v1/2023.semeval-1.24
%P 171-176
Markdown (Informal)
[eevvgg at SemEval-2023 Task 11: Offensive Language Classification with Rater-based Information](https://aclanthology.org/2023.semeval-1.24) (Gajewska, SemEval 2023)
ACL