@inproceedings{sathvik-etal-2026-mapping,
title = "Mapping the Landscape of Unregulated e{X}plicit Contents on {R}eddit",
author = "Sathvik, Msvpj and
Choudhury, Manan Roy and
Agarwal, Rishita and
Narkedimilli, Sathwik and
Ha, Thao and
Sharabi, Liesel and
Gupta, Vivek",
editor = "Card, Dallas and
Field, Anjalie and
Keith, Katherine and
Mendelsohn, Julia",
booktitle = "Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science",
month = jul,
year = "2026",
address = "San Diego",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.nlpcss-1.16/",
pages = "271--292",
ISBN = "979-8-89176-426-2",
abstract = "The rise of online platforms has facilitated covert forms of explicit content, which pose significant challenges for detection and regulation. Often using coded language to bypass moderation, this content erodes user trust and may be associated with scam-related risks, posing direct financial and personal risks. In this study, we map the landscape of online explicit content posts, focusing on their categorization, linguistic strategies, and temporal and behavioral patterns as they appear within mainstream platform reddit. We investigated five distinct content categories including Virtual Services (VS), Physical Services (PS), Exhibitionism (Ex), Couples and Group Interactions (CGI), and Content Creation and Sales (CCS) and performedmed large-scale experimentation using state-of-the-art large language models (LLMs) such as GPT-4, LLaMA 3.3-70B-Instruct, Gemini 1.5 Flash, Mistral 8{\texttimes}7B, Qwen 2.5 Turbo, and Claude 3.5 Haiku. Our work demonstrates that a nuanced classification of these services requires moving beyond simple keywords, and we establish that expressive signals such as sentiment, emotion, and tone are critical features for accurate detection. Our analysis reveals the distinct behavioral and psychosocial expression patterns that characterize each service category, providing a robust framework for future moderation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sathvik-etal-2026-mapping">
<titleInfo>
<title>Mapping the Landscape of Unregulated eXplicit Contents on Reddit</title>
</titleInfo>
<name type="personal">
<namePart type="given">Msvpj</namePart>
<namePart type="family">Sathvik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manan</namePart>
<namePart type="given">Roy</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rishita</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sathwik</namePart>
<namePart type="family">Narkedimilli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thao</namePart>
<namePart type="family">Ha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liesel</namePart>
<namePart type="family">Sharabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dallas</namePart>
<namePart type="family">Card</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anjalie</namePart>
<namePart type="family">Field</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Mendelsohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-426-2</identifier>
</relatedItem>
<abstract>The rise of online platforms has facilitated covert forms of explicit content, which pose significant challenges for detection and regulation. Often using coded language to bypass moderation, this content erodes user trust and may be associated with scam-related risks, posing direct financial and personal risks. In this study, we map the landscape of online explicit content posts, focusing on their categorization, linguistic strategies, and temporal and behavioral patterns as they appear within mainstream platform reddit. We investigated five distinct content categories including Virtual Services (VS), Physical Services (PS), Exhibitionism (Ex), Couples and Group Interactions (CGI), and Content Creation and Sales (CCS) and performedmed large-scale experimentation using state-of-the-art large language models (LLMs) such as GPT-4, LLaMA 3.3-70B-Instruct, Gemini 1.5 Flash, Mistral 8×7B, Qwen 2.5 Turbo, and Claude 3.5 Haiku. Our work demonstrates that a nuanced classification of these services requires moving beyond simple keywords, and we establish that expressive signals such as sentiment, emotion, and tone are critical features for accurate detection. Our analysis reveals the distinct behavioral and psychosocial expression patterns that characterize each service category, providing a robust framework for future moderation.</abstract>
<identifier type="citekey">sathvik-etal-2026-mapping</identifier>
<location>
<url>https://aclanthology.org/2026.nlpcss-1.16/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>271</start>
<end>292</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mapping the Landscape of Unregulated eXplicit Contents on Reddit
%A Sathvik, Msvpj
%A Choudhury, Manan Roy
%A Agarwal, Rishita
%A Narkedimilli, Sathwik
%A Ha, Thao
%A Sharabi, Liesel
%A Gupta, Vivek
%Y Card, Dallas
%Y Field, Anjalie
%Y Keith, Katherine
%Y Mendelsohn, Julia
%S Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego
%@ 979-8-89176-426-2
%F sathvik-etal-2026-mapping
%X The rise of online platforms has facilitated covert forms of explicit content, which pose significant challenges for detection and regulation. Often using coded language to bypass moderation, this content erodes user trust and may be associated with scam-related risks, posing direct financial and personal risks. In this study, we map the landscape of online explicit content posts, focusing on their categorization, linguistic strategies, and temporal and behavioral patterns as they appear within mainstream platform reddit. We investigated five distinct content categories including Virtual Services (VS), Physical Services (PS), Exhibitionism (Ex), Couples and Group Interactions (CGI), and Content Creation and Sales (CCS) and performedmed large-scale experimentation using state-of-the-art large language models (LLMs) such as GPT-4, LLaMA 3.3-70B-Instruct, Gemini 1.5 Flash, Mistral 8×7B, Qwen 2.5 Turbo, and Claude 3.5 Haiku. Our work demonstrates that a nuanced classification of these services requires moving beyond simple keywords, and we establish that expressive signals such as sentiment, emotion, and tone are critical features for accurate detection. Our analysis reveals the distinct behavioral and psychosocial expression patterns that characterize each service category, providing a robust framework for future moderation.
%U https://aclanthology.org/2026.nlpcss-1.16/
%P 271-292
Markdown (Informal)
[Mapping the Landscape of Unregulated eXplicit Contents on Reddit](https://aclanthology.org/2026.nlpcss-1.16/) (Sathvik et al., NLP+CSS 2026)
ACL
- Msvpj Sathvik, Manan Roy Choudhury, Rishita Agarwal, Sathwik Narkedimilli, Thao Ha, Liesel Sharabi, and Vivek Gupta. 2026. Mapping the Landscape of Unregulated eXplicit Contents on Reddit. In Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science, pages 271–292, San Diego. Association for Computational Linguistics.