@inproceedings{chernyak-2017-comparison,
title = "Comparison of String Similarity Measures for Obscenity Filtering",
author = "Chernyak, Ekaterina",
editor = "Erjavec, Toma{\v{z}} and
Piskorski, Jakub and
Pivovarova, Lidia and
{\v{S}}najder, Jan and
Steinberger, Josef and
Yangarber, Roman",
booktitle = "Proceedings of the 6th Workshop on {B}alto-{S}lavic Natural Language Processing",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1415",
doi = "10.18653/v1/W17-1415",
pages = "97--101",
abstract = "In this paper we address the problem of filtering obscene lexis in Russian texts. We use string similarity measures to find words similar or identical to words from a stop list and establish both a test collection and a baseline for the task. Our experiments show that a novel string similarity measure based on the notion of an annotated suffix tree outperforms some of the other well known measures.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chernyak-2017-comparison">
<titleInfo>
<title>Comparison of String Similarity Measures for Obscenity Filtering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Chernyak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Balto-Slavic Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tomaž</namePart>
<namePart type="family">Erjavec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lidia</namePart>
<namePart type="family">Pivovarova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Šnajder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">Steinberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we address the problem of filtering obscene lexis in Russian texts. We use string similarity measures to find words similar or identical to words from a stop list and establish both a test collection and a baseline for the task. Our experiments show that a novel string similarity measure based on the notion of an annotated suffix tree outperforms some of the other well known measures.</abstract>
<identifier type="citekey">chernyak-2017-comparison</identifier>
<identifier type="doi">10.18653/v1/W17-1415</identifier>
<location>
<url>https://aclanthology.org/W17-1415</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>97</start>
<end>101</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparison of String Similarity Measures for Obscenity Filtering
%A Chernyak, Ekaterina
%Y Erjavec, Tomaž
%Y Piskorski, Jakub
%Y Pivovarova, Lidia
%Y Šnajder, Jan
%Y Steinberger, Josef
%Y Yangarber, Roman
%S Proceedings of the 6th Workshop on Balto-Slavic Natural Language Processing
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F chernyak-2017-comparison
%X In this paper we address the problem of filtering obscene lexis in Russian texts. We use string similarity measures to find words similar or identical to words from a stop list and establish both a test collection and a baseline for the task. Our experiments show that a novel string similarity measure based on the notion of an annotated suffix tree outperforms some of the other well known measures.
%R 10.18653/v1/W17-1415
%U https://aclanthology.org/W17-1415
%U https://doi.org/10.18653/v1/W17-1415
%P 97-101
Markdown (Informal)
[Comparison of String Similarity Measures for Obscenity Filtering](https://aclanthology.org/W17-1415) (Chernyak, BSNLP 2017)
ACL