@inproceedings{fromknecht-palmer-2020-unt,
title = "{UNT} Linguistics at {S}em{E}val-2020 Task 12: Linear {SVC} with Pre-trained Word Embeddings as Document Vectors and Targeted Linguistic Features",
author = "Fromknecht, Jared and
Palmer, Alexis",
editor = "Herbelot, Aurelie and
Zhu, Xiaodan and
Palmer, Alexis and
Schneider, Nathan and
May, Jonathan and
Shutova, Ekaterina",
booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.294",
doi = "10.18653/v1/2020.semeval-1.294",
pages = "2209--2215",
abstract = "This paper outlines our approach to Tasks A {\&} B for the English Language track of SemEval-2020 Task 12: OffensEval 2: Multilingual Offensive Language Identification in Social Media. We use a Linear SVM with document vectors computed from pre-trained word embeddings, and we explore the effectiveness of lexical, part of speech, dependency, and named entity (NE) features. We manually annotate a subset of the training data, which we use for error analysis and to tune a threshold for mapping training confidence values to labels. While document vectors are consistently the most informative features for both tasks, testing on the development set suggests that dependency features are an effective addition for Task A, and NE features for Task B.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fromknecht-palmer-2020-unt">
<titleInfo>
<title>UNT Linguistics at SemEval-2020 Task 12: Linear SVC with Pre-trained Word Embeddings as Document Vectors and Targeted Linguistic Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jared</namePart>
<namePart type="family">Fromknecht</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aurelie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper outlines our approach to Tasks A & B for the English Language track of SemEval-2020 Task 12: OffensEval 2: Multilingual Offensive Language Identification in Social Media. We use a Linear SVM with document vectors computed from pre-trained word embeddings, and we explore the effectiveness of lexical, part of speech, dependency, and named entity (NE) features. We manually annotate a subset of the training data, which we use for error analysis and to tune a threshold for mapping training confidence values to labels. While document vectors are consistently the most informative features for both tasks, testing on the development set suggests that dependency features are an effective addition for Task A, and NE features for Task B.</abstract>
<identifier type="citekey">fromknecht-palmer-2020-unt</identifier>
<identifier type="doi">10.18653/v1/2020.semeval-1.294</identifier>
<location>
<url>https://aclanthology.org/2020.semeval-1.294</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>2209</start>
<end>2215</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UNT Linguistics at SemEval-2020 Task 12: Linear SVC with Pre-trained Word Embeddings as Document Vectors and Targeted Linguistic Features
%A Fromknecht, Jared
%A Palmer, Alexis
%Y Herbelot, Aurelie
%Y Zhu, Xiaodan
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y May, Jonathan
%Y Shutova, Ekaterina
%S Proceedings of the Fourteenth Workshop on Semantic Evaluation
%D 2020
%8 December
%I International Committee for Computational Linguistics
%C Barcelona (online)
%F fromknecht-palmer-2020-unt
%X This paper outlines our approach to Tasks A & B for the English Language track of SemEval-2020 Task 12: OffensEval 2: Multilingual Offensive Language Identification in Social Media. We use a Linear SVM with document vectors computed from pre-trained word embeddings, and we explore the effectiveness of lexical, part of speech, dependency, and named entity (NE) features. We manually annotate a subset of the training data, which we use for error analysis and to tune a threshold for mapping training confidence values to labels. While document vectors are consistently the most informative features for both tasks, testing on the development set suggests that dependency features are an effective addition for Task A, and NE features for Task B.
%R 10.18653/v1/2020.semeval-1.294
%U https://aclanthology.org/2020.semeval-1.294
%U https://doi.org/10.18653/v1/2020.semeval-1.294
%P 2209-2215
Markdown (Informal)
[UNT Linguistics at SemEval-2020 Task 12: Linear SVC with Pre-trained Word Embeddings as Document Vectors and Targeted Linguistic Features](https://aclanthology.org/2020.semeval-1.294) (Fromknecht & Palmer, SemEval 2020)
ACL