@inproceedings{eder-etal-2019-lower,
title = "At the Lower End of {L}anguage{---}{E}xploring the Vulgar and Obscene Side of {G}erman",
author = "Eder, Elisabeth and
Krieg-Holz, Ulrike and
Hahn, Udo",
editor = "Roberts, Sarah T. and
Tetreault, Joel and
Prabhakaran, Vinodkumar and
Waseem, Zeerak",
booktitle = "Proceedings of the Third Workshop on Abusive Language Online",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3513",
doi = "10.18653/v1/W19-3513",
pages = "119--128",
abstract = "In this paper, we describe a workflow for the data-driven acquisition and semantic scaling of a lexicon that covers lexical items from the lower end of the German language register{---}terms typically considered as rough, vulgar or obscene. Since the fine semantic representation of grades of obscenity can only inadequately be captured at the categorical level (e.g., obscene vs. non-obscene, or rough vs. vulgar), our main contribution lies in applying best-worst scaling, a rating methodology that has already been shown to be useful for emotional language, to capture the relative strength of obscenity of lexical items. We describe the empirical foundations for bootstrapping such a low-end lexicon for German by starting from manually supplied lexicographic categorizations of a small seed set of rough and vulgar lexical items and automatically enlarging this set by means of distributional semantics. We then determine the degrees of obscenity for the full set of all acquired lexical items by letting crowdworkers comparatively assess their pejorative grade using best-worst scaling. This semi-automatically enriched lexicon already comprises 3,300 lexical items and incorporates 33,000 vulgarity ratings. Using it as a seed lexicon for fully automatic lexical acquisition, we were able to raise its coverage up to slightly more than 11,000 entries.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eder-etal-2019-lower">
<titleInfo>
<title>At the Lower End of Language—Exploring the Vulgar and Obscene Side of German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elisabeth</namePart>
<namePart type="family">Eder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulrike</namePart>
<namePart type="family">Krieg-Holz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Abusive Language Online</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe a workflow for the data-driven acquisition and semantic scaling of a lexicon that covers lexical items from the lower end of the German language register—terms typically considered as rough, vulgar or obscene. Since the fine semantic representation of grades of obscenity can only inadequately be captured at the categorical level (e.g., obscene vs. non-obscene, or rough vs. vulgar), our main contribution lies in applying best-worst scaling, a rating methodology that has already been shown to be useful for emotional language, to capture the relative strength of obscenity of lexical items. We describe the empirical foundations for bootstrapping such a low-end lexicon for German by starting from manually supplied lexicographic categorizations of a small seed set of rough and vulgar lexical items and automatically enlarging this set by means of distributional semantics. We then determine the degrees of obscenity for the full set of all acquired lexical items by letting crowdworkers comparatively assess their pejorative grade using best-worst scaling. This semi-automatically enriched lexicon already comprises 3,300 lexical items and incorporates 33,000 vulgarity ratings. Using it as a seed lexicon for fully automatic lexical acquisition, we were able to raise its coverage up to slightly more than 11,000 entries.</abstract>
<identifier type="citekey">eder-etal-2019-lower</identifier>
<identifier type="doi">10.18653/v1/W19-3513</identifier>
<location>
<url>https://aclanthology.org/W19-3513</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>119</start>
<end>128</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T At the Lower End of Language—Exploring the Vulgar and Obscene Side of German
%A Eder, Elisabeth
%A Krieg-Holz, Ulrike
%A Hahn, Udo
%Y Roberts, Sarah T.
%Y Tetreault, Joel
%Y Prabhakaran, Vinodkumar
%Y Waseem, Zeerak
%S Proceedings of the Third Workshop on Abusive Language Online
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F eder-etal-2019-lower
%X In this paper, we describe a workflow for the data-driven acquisition and semantic scaling of a lexicon that covers lexical items from the lower end of the German language register—terms typically considered as rough, vulgar or obscene. Since the fine semantic representation of grades of obscenity can only inadequately be captured at the categorical level (e.g., obscene vs. non-obscene, or rough vs. vulgar), our main contribution lies in applying best-worst scaling, a rating methodology that has already been shown to be useful for emotional language, to capture the relative strength of obscenity of lexical items. We describe the empirical foundations for bootstrapping such a low-end lexicon for German by starting from manually supplied lexicographic categorizations of a small seed set of rough and vulgar lexical items and automatically enlarging this set by means of distributional semantics. We then determine the degrees of obscenity for the full set of all acquired lexical items by letting crowdworkers comparatively assess their pejorative grade using best-worst scaling. This semi-automatically enriched lexicon already comprises 3,300 lexical items and incorporates 33,000 vulgarity ratings. Using it as a seed lexicon for fully automatic lexical acquisition, we were able to raise its coverage up to slightly more than 11,000 entries.
%R 10.18653/v1/W19-3513
%U https://aclanthology.org/W19-3513
%U https://doi.org/10.18653/v1/W19-3513
%P 119-128
Markdown (Informal)
[At the Lower End of Language—Exploring the Vulgar and Obscene Side of German](https://aclanthology.org/W19-3513) (Eder et al., ALW 2019)
ACL