@inproceedings{hildebrandt-etal-2021-data,
title = "Data Science Kitchen at {G}erm{E}val 2021: A Fine Selection of Hand-Picked Features, Delivered Fresh from the Oven",
author = "Hildebrandt, Niclas and
Boenninghoff, Benedikt and
Orth, Dennis and
Schymura, Christopher",
editor = "Risch, Julian and
Stoll, Anke and
Wilms, Lena and
Wiegand, Michael",
booktitle = "Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments",
month = sep,
year = "2021",
address = "Duesseldorf, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.germeval-1.13",
pages = "88--94",
abstract = "This paper presents the contribution of the Data Science Kitchen at GermEval 2021 shared task on the identification of toxic, engaging, and fact-claiming comments. The task aims at extending the identification of offensive language, by including additional subtasks that identify comments which should be prioritized for fact-checking by moderators and community managers. Our contribution focuses on a feature-engineering approach with a conventional classification backend. We combine semantic and writing style embeddings derived from pre-trained deep neural networks with additional numerical features, specifically designed for this task. Ensembles of Logistic Regression classifiers and Support Vector Machines are used to derive predictions for each subtask via a majority voting scheme. Our best submission achieved macro-averaged F1-scores of 66.8{\%}, 69.9{\%} and 72.5{\%} for the identification of toxic, engaging, and fact-claiming comments.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hildebrandt-etal-2021-data">
<titleInfo>
<title>Data Science Kitchen at GermEval 2021: A Fine Selection of Hand-Picked Features, Delivered Fresh from the Oven</title>
</titleInfo>
<name type="personal">
<namePart type="given">Niclas</namePart>
<namePart type="family">Hildebrandt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benedikt</namePart>
<namePart type="family">Boenninghoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dennis</namePart>
<namePart type="family">Orth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Schymura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Risch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anke</namePart>
<namePart type="family">Stoll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lena</namePart>
<namePart type="family">Wilms</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Wiegand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Duesseldorf, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the contribution of the Data Science Kitchen at GermEval 2021 shared task on the identification of toxic, engaging, and fact-claiming comments. The task aims at extending the identification of offensive language, by including additional subtasks that identify comments which should be prioritized for fact-checking by moderators and community managers. Our contribution focuses on a feature-engineering approach with a conventional classification backend. We combine semantic and writing style embeddings derived from pre-trained deep neural networks with additional numerical features, specifically designed for this task. Ensembles of Logistic Regression classifiers and Support Vector Machines are used to derive predictions for each subtask via a majority voting scheme. Our best submission achieved macro-averaged F1-scores of 66.8%, 69.9% and 72.5% for the identification of toxic, engaging, and fact-claiming comments.</abstract>
<identifier type="citekey">hildebrandt-etal-2021-data</identifier>
<location>
<url>https://aclanthology.org/2021.germeval-1.13</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>88</start>
<end>94</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Science Kitchen at GermEval 2021: A Fine Selection of Hand-Picked Features, Delivered Fresh from the Oven
%A Hildebrandt, Niclas
%A Boenninghoff, Benedikt
%A Orth, Dennis
%A Schymura, Christopher
%Y Risch, Julian
%Y Stoll, Anke
%Y Wilms, Lena
%Y Wiegand, Michael
%S Proceedings of the GermEval 2021 Shared Task on the Identification of Toxic, Engaging, and Fact-Claiming Comments
%D 2021
%8 September
%I Association for Computational Linguistics
%C Duesseldorf, Germany
%F hildebrandt-etal-2021-data
%X This paper presents the contribution of the Data Science Kitchen at GermEval 2021 shared task on the identification of toxic, engaging, and fact-claiming comments. The task aims at extending the identification of offensive language, by including additional subtasks that identify comments which should be prioritized for fact-checking by moderators and community managers. Our contribution focuses on a feature-engineering approach with a conventional classification backend. We combine semantic and writing style embeddings derived from pre-trained deep neural networks with additional numerical features, specifically designed for this task. Ensembles of Logistic Regression classifiers and Support Vector Machines are used to derive predictions for each subtask via a majority voting scheme. Our best submission achieved macro-averaged F1-scores of 66.8%, 69.9% and 72.5% for the identification of toxic, engaging, and fact-claiming comments.
%U https://aclanthology.org/2021.germeval-1.13
%P 88-94
Markdown (Informal)
[Data Science Kitchen at GermEval 2021: A Fine Selection of Hand-Picked Features, Delivered Fresh from the Oven](https://aclanthology.org/2021.germeval-1.13) (Hildebrandt et al., GermEval 2021)
ACL