@inproceedings{nagaraj-rao-etal-2025-quallm,
title = "{Q}ua{LLM}: An {LLM}-based Framework to Extract Quantitative Insights from Online Forums",
author = "Nagaraj Rao, Varun and
Agarwal, Eesha and
Dalal, Samantha and
Calacci, Dana and
Monroy-Hern{\'a}ndez, Andr{\'e}s",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.74/",
doi = "10.18653/v1/2025.findings-naacl.74",
pages = "1355--1369",
ISBN = "979-8-89176-195-7",
abstract = "Online discussion forums provide crucial data to understand the concerns of a wide range of real-world communities. However, the typical qualitative and quantitative methodologies used to analyze those data, such as thematic analysis and topic modeling, are infeasible to scale or require significant human effort to translate outputs to human readable forms. This study introduces QuaLLM, a novel LLM-based framework to analyze and extract quantitative insights from text data on online forums. The framework consists of a novel prompting and human evaluation methodology. We applied this framework to analyze over one million comments from two of Reddit{'}s rideshare worker communities, marking the largest study of its type. We uncover significant worker concerns regarding AI and algorithmic platform decisions, responding to regulatory calls about worker insights. In short, our work sets a new precedent for AI-assisted quantitative data analysis to surface concerns from online forums."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nagaraj-rao-etal-2025-quallm">
<titleInfo>
<title>QuaLLM: An LLM-based Framework to Extract Quantitative Insights from Online Forums</title>
</titleInfo>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="family">Nagaraj Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eesha</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samantha</namePart>
<namePart type="family">Dalal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Calacci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrés</namePart>
<namePart type="family">Monroy-Hernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Online discussion forums provide crucial data to understand the concerns of a wide range of real-world communities. However, the typical qualitative and quantitative methodologies used to analyze those data, such as thematic analysis and topic modeling, are infeasible to scale or require significant human effort to translate outputs to human readable forms. This study introduces QuaLLM, a novel LLM-based framework to analyze and extract quantitative insights from text data on online forums. The framework consists of a novel prompting and human evaluation methodology. We applied this framework to analyze over one million comments from two of Reddit’s rideshare worker communities, marking the largest study of its type. We uncover significant worker concerns regarding AI and algorithmic platform decisions, responding to regulatory calls about worker insights. In short, our work sets a new precedent for AI-assisted quantitative data analysis to surface concerns from online forums.</abstract>
<identifier type="citekey">nagaraj-rao-etal-2025-quallm</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.74</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.74/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>1355</start>
<end>1369</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T QuaLLM: An LLM-based Framework to Extract Quantitative Insights from Online Forums
%A Nagaraj Rao, Varun
%A Agarwal, Eesha
%A Dalal, Samantha
%A Calacci, Dana
%A Monroy-Hernández, Andrés
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F nagaraj-rao-etal-2025-quallm
%X Online discussion forums provide crucial data to understand the concerns of a wide range of real-world communities. However, the typical qualitative and quantitative methodologies used to analyze those data, such as thematic analysis and topic modeling, are infeasible to scale or require significant human effort to translate outputs to human readable forms. This study introduces QuaLLM, a novel LLM-based framework to analyze and extract quantitative insights from text data on online forums. The framework consists of a novel prompting and human evaluation methodology. We applied this framework to analyze over one million comments from two of Reddit’s rideshare worker communities, marking the largest study of its type. We uncover significant worker concerns regarding AI and algorithmic platform decisions, responding to regulatory calls about worker insights. In short, our work sets a new precedent for AI-assisted quantitative data analysis to surface concerns from online forums.
%R 10.18653/v1/2025.findings-naacl.74
%U https://aclanthology.org/2025.findings-naacl.74/
%U https://doi.org/10.18653/v1/2025.findings-naacl.74
%P 1355-1369
Markdown (Informal)
[QuaLLM: An LLM-based Framework to Extract Quantitative Insights from Online Forums](https://aclanthology.org/2025.findings-naacl.74/) (Nagaraj Rao et al., Findings 2025)
ACL