@inproceedings{bagdon-etal-2025-donate,
title = "Donate or Create? Comparing Data Collection Strategies for Emotion-labeled Multimodal Social Media Posts",
author = "Bagdon, Christopher and
Combs, Aidan and
Silberer, Carina and
Klinger, Roman",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.847/",
doi = "10.18653/v1/2025.acl-long.847",
pages = "17307--17330",
ISBN = "979-8-89176-251-0",
abstract = "Accurate modeling of subjective phenomena such as emotion expression requires data annotated with authors' intentions. Commonly such data is collected by asking study participants to donate and label genuine content produced in the real world, or create content fitting particu- lar labels during the study. Asking participants to create content is often simpler to implement and presents fewer risks to participant privacy than data donation. However, it is unclear if and how study-created content may differ from genuine content, and how differences may impact models. We collect study-created and genuine multimodal social media posts labeled for emotion and compare them on several dimen- sions, including model performance. We find that compared to genuine posts, study-created posts are longer, rely more on their text and less on their images for emotion expression, and focus more on emotion-prototypical events. The samples of participants willing to donate versus create posts are demographically different. Study-created data is valuable to train models that generalize well to genuine data, but realistic effectiveness estimates require genuine data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bagdon-etal-2025-donate">
<titleInfo>
<title>Donate or Create? Comparing Data Collection Strategies for Emotion-labeled Multimodal Social Media Posts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Bagdon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aidan</namePart>
<namePart type="family">Combs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carina</namePart>
<namePart type="family">Silberer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Klinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Accurate modeling of subjective phenomena such as emotion expression requires data annotated with authors’ intentions. Commonly such data is collected by asking study participants to donate and label genuine content produced in the real world, or create content fitting particu- lar labels during the study. Asking participants to create content is often simpler to implement and presents fewer risks to participant privacy than data donation. However, it is unclear if and how study-created content may differ from genuine content, and how differences may impact models. We collect study-created and genuine multimodal social media posts labeled for emotion and compare them on several dimen- sions, including model performance. We find that compared to genuine posts, study-created posts are longer, rely more on their text and less on their images for emotion expression, and focus more on emotion-prototypical events. The samples of participants willing to donate versus create posts are demographically different. Study-created data is valuable to train models that generalize well to genuine data, but realistic effectiveness estimates require genuine data.</abstract>
<identifier type="citekey">bagdon-etal-2025-donate</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.847</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.847/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>17307</start>
<end>17330</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Donate or Create? Comparing Data Collection Strategies for Emotion-labeled Multimodal Social Media Posts
%A Bagdon, Christopher
%A Combs, Aidan
%A Silberer, Carina
%A Klinger, Roman
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F bagdon-etal-2025-donate
%X Accurate modeling of subjective phenomena such as emotion expression requires data annotated with authors’ intentions. Commonly such data is collected by asking study participants to donate and label genuine content produced in the real world, or create content fitting particu- lar labels during the study. Asking participants to create content is often simpler to implement and presents fewer risks to participant privacy than data donation. However, it is unclear if and how study-created content may differ from genuine content, and how differences may impact models. We collect study-created and genuine multimodal social media posts labeled for emotion and compare them on several dimen- sions, including model performance. We find that compared to genuine posts, study-created posts are longer, rely more on their text and less on their images for emotion expression, and focus more on emotion-prototypical events. The samples of participants willing to donate versus create posts are demographically different. Study-created data is valuable to train models that generalize well to genuine data, but realistic effectiveness estimates require genuine data.
%R 10.18653/v1/2025.acl-long.847
%U https://aclanthology.org/2025.acl-long.847/
%U https://doi.org/10.18653/v1/2025.acl-long.847
%P 17307-17330
Markdown (Informal)
[Donate or Create? Comparing Data Collection Strategies for Emotion-labeled Multimodal Social Media Posts](https://aclanthology.org/2025.acl-long.847/) (Bagdon et al., ACL 2025)
ACL