@inproceedings{chu-etal-2026-bigtokdetect,
title = "{B}ig{T}ok{D}etect: A Clinically-Informed Vision{--}Language Modeling Framework for Detecting Pro-Bigorexia Videos on {T}ik{T}ok",
author = "Chu, Minh Duc and
Pawar, Kshitij and
He, Zihao and
Sharifi, Roxanna and
Sonnenblick, Ross M. and
Curry, Magdalayna and
DAdamo, Laura and
Young, Lindsay and
Murray, Stuart and
Lerman, Kristina",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.33/",
pages = "766--790",
ISBN = "979-8-89176-380-7",
abstract = "Social media platforms face escalating challenges in detecting harmful content that promotes muscle dysmorphic behaviors and cognitions (bigorexia). This content can evade moderation by camouflaging as legitimate fitness advice and disproportionately affects adolescent males. We address this challenge with BigTokDetect, a clinically informed framework for identifying pro-bigorexia content on TikTok. We introduce BigTok, the first expert-annotated multimodal benchmark dataset of over 2,200 TikTok videos labeled by clinical psychiatrists across five categories and eighteen fine-grained subcategories. Comprehensive evaluation of state-of-the-art vision-language models reveals that while commercial zero-shot models achieve the highest accuracy on broad primary categories, supervised fine-tuning enables smaller open-source models to perform better on fine-grained subcategory detection. Ablation studies show that multimodal fusion improves performance by 5 to 15 percent, with video features providing the most discriminative signals. These findings support a grounded moderation approach that automates detection of explicit harms while flagging ambiguous content for human review, and they establish a scalable framework for harm mitigation in emerging mental health domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chu-etal-2026-bigtokdetect">
<titleInfo>
<title>BigTokDetect: A Clinically-Informed Vision–Language Modeling Framework for Detecting Pro-Bigorexia Videos on TikTok</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minh</namePart>
<namePart type="given">Duc</namePart>
<namePart type="family">Chu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kshitij</namePart>
<namePart type="family">Pawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zihao</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roxanna</namePart>
<namePart type="family">Sharifi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ross</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sonnenblick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magdalayna</namePart>
<namePart type="family">Curry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">DAdamo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lindsay</namePart>
<namePart type="family">Young</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stuart</namePart>
<namePart type="family">Murray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Lerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Social media platforms face escalating challenges in detecting harmful content that promotes muscle dysmorphic behaviors and cognitions (bigorexia). This content can evade moderation by camouflaging as legitimate fitness advice and disproportionately affects adolescent males. We address this challenge with BigTokDetect, a clinically informed framework for identifying pro-bigorexia content on TikTok. We introduce BigTok, the first expert-annotated multimodal benchmark dataset of over 2,200 TikTok videos labeled by clinical psychiatrists across five categories and eighteen fine-grained subcategories. Comprehensive evaluation of state-of-the-art vision-language models reveals that while commercial zero-shot models achieve the highest accuracy on broad primary categories, supervised fine-tuning enables smaller open-source models to perform better on fine-grained subcategory detection. Ablation studies show that multimodal fusion improves performance by 5 to 15 percent, with video features providing the most discriminative signals. These findings support a grounded moderation approach that automates detection of explicit harms while flagging ambiguous content for human review, and they establish a scalable framework for harm mitigation in emerging mental health domains.</abstract>
<identifier type="citekey">chu-etal-2026-bigtokdetect</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.33/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>766</start>
<end>790</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BigTokDetect: A Clinically-Informed Vision–Language Modeling Framework for Detecting Pro-Bigorexia Videos on TikTok
%A Chu, Minh Duc
%A Pawar, Kshitij
%A He, Zihao
%A Sharifi, Roxanna
%A Sonnenblick, Ross M.
%A Curry, Magdalayna
%A DAdamo, Laura
%A Young, Lindsay
%A Murray, Stuart
%A Lerman, Kristina
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F chu-etal-2026-bigtokdetect
%X Social media platforms face escalating challenges in detecting harmful content that promotes muscle dysmorphic behaviors and cognitions (bigorexia). This content can evade moderation by camouflaging as legitimate fitness advice and disproportionately affects adolescent males. We address this challenge with BigTokDetect, a clinically informed framework for identifying pro-bigorexia content on TikTok. We introduce BigTok, the first expert-annotated multimodal benchmark dataset of over 2,200 TikTok videos labeled by clinical psychiatrists across five categories and eighteen fine-grained subcategories. Comprehensive evaluation of state-of-the-art vision-language models reveals that while commercial zero-shot models achieve the highest accuracy on broad primary categories, supervised fine-tuning enables smaller open-source models to perform better on fine-grained subcategory detection. Ablation studies show that multimodal fusion improves performance by 5 to 15 percent, with video features providing the most discriminative signals. These findings support a grounded moderation approach that automates detection of explicit harms while flagging ambiguous content for human review, and they establish a scalable framework for harm mitigation in emerging mental health domains.
%U https://aclanthology.org/2026.eacl-long.33/
%P 766-790
Markdown (Informal)
[BigTokDetect: A Clinically-Informed Vision–Language Modeling Framework for Detecting Pro-Bigorexia Videos on TikTok](https://aclanthology.org/2026.eacl-long.33/) (Chu et al., EACL 2026)
ACL
- Minh Duc Chu, Kshitij Pawar, Zihao He, Roxanna Sharifi, Ross M. Sonnenblick, Magdalayna Curry, Laura DAdamo, Lindsay Young, Stuart Murray, and Kristina Lerman. 2026. BigTokDetect: A Clinically-Informed Vision–Language Modeling Framework for Detecting Pro-Bigorexia Videos on TikTok. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 766–790, Rabat, Morocco. Association for Computational Linguistics.