@inproceedings{abdelgaber-etal-2025-bridging,
title = "Bridging the Socioeconomic Gap in Education: A Hybrid {AI} and Human Annotation Approach",
author = "Abdelgaber, Nahed and
Jahan, Labiba and
Doshi, Arham Vinit and
Suri, Rishi and
Pavel, Hamza Reza and
Zhang, Jia",
editor = "Boleda, Gemma and
Roth, Michael",
booktitle = "Proceedings of the 29th Conference on Computational Natural Language Learning",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.conll-1.23/",
doi = "10.18653/v1/2025.conll-1.23",
pages = "348--364",
ISBN = "979-8-89176-271-8",
abstract = "Students' academic performance is influenced by various demographic factors, with socioeconomic class being a prominently researched and debated factor. Computer Science research traditionally prioritizes computationally definable problems, yet challenges such as the scarcity of high-quality labeled data and ethical concerns surrounding the mining of personal information can pose barriers to exploring topics like the impact of SES on students' education. Overcoming these barriers may involve automating the collection and annotation of high-quality language data from diverse social groups through human collaboration. Therefore, our focus is on gathering unstructured narratives from Internet forums written by students with low socioeconomic status (SES) using machine learning models and human insights. We developed a hybrid data collection model that semi-automatically retrieved narratives from the Reddit website and created a dataset five times larger than the seed dataset. Additionally, we compared the performance of traditional ML models with recent large language models (LLMs) in classifying narratives written by low-SES students, and analyzed the collected data to extract valuable insights into the socioeconomic challenges these students encounter and the solutions they pursue."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdelgaber-etal-2025-bridging">
<titleInfo>
<title>Bridging the Socioeconomic Gap in Education: A Hybrid AI and Human Annotation Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nahed</namePart>
<namePart type="family">Abdelgaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Labiba</namePart>
<namePart type="family">Jahan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arham</namePart>
<namePart type="given">Vinit</namePart>
<namePart type="family">Doshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rishi</namePart>
<namePart type="family">Suri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamza</namePart>
<namePart type="given">Reza</namePart>
<namePart type="family">Pavel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jia</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-271-8</identifier>
</relatedItem>
<abstract>Students’ academic performance is influenced by various demographic factors, with socioeconomic class being a prominently researched and debated factor. Computer Science research traditionally prioritizes computationally definable problems, yet challenges such as the scarcity of high-quality labeled data and ethical concerns surrounding the mining of personal information can pose barriers to exploring topics like the impact of SES on students’ education. Overcoming these barriers may involve automating the collection and annotation of high-quality language data from diverse social groups through human collaboration. Therefore, our focus is on gathering unstructured narratives from Internet forums written by students with low socioeconomic status (SES) using machine learning models and human insights. We developed a hybrid data collection model that semi-automatically retrieved narratives from the Reddit website and created a dataset five times larger than the seed dataset. Additionally, we compared the performance of traditional ML models with recent large language models (LLMs) in classifying narratives written by low-SES students, and analyzed the collected data to extract valuable insights into the socioeconomic challenges these students encounter and the solutions they pursue.</abstract>
<identifier type="citekey">abdelgaber-etal-2025-bridging</identifier>
<identifier type="doi">10.18653/v1/2025.conll-1.23</identifier>
<location>
<url>https://aclanthology.org/2025.conll-1.23/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>348</start>
<end>364</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bridging the Socioeconomic Gap in Education: A Hybrid AI and Human Annotation Approach
%A Abdelgaber, Nahed
%A Jahan, Labiba
%A Doshi, Arham Vinit
%A Suri, Rishi
%A Pavel, Hamza Reza
%A Zhang, Jia
%Y Boleda, Gemma
%Y Roth, Michael
%S Proceedings of the 29th Conference on Computational Natural Language Learning
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-271-8
%F abdelgaber-etal-2025-bridging
%X Students’ academic performance is influenced by various demographic factors, with socioeconomic class being a prominently researched and debated factor. Computer Science research traditionally prioritizes computationally definable problems, yet challenges such as the scarcity of high-quality labeled data and ethical concerns surrounding the mining of personal information can pose barriers to exploring topics like the impact of SES on students’ education. Overcoming these barriers may involve automating the collection and annotation of high-quality language data from diverse social groups through human collaboration. Therefore, our focus is on gathering unstructured narratives from Internet forums written by students with low socioeconomic status (SES) using machine learning models and human insights. We developed a hybrid data collection model that semi-automatically retrieved narratives from the Reddit website and created a dataset five times larger than the seed dataset. Additionally, we compared the performance of traditional ML models with recent large language models (LLMs) in classifying narratives written by low-SES students, and analyzed the collected data to extract valuable insights into the socioeconomic challenges these students encounter and the solutions they pursue.
%R 10.18653/v1/2025.conll-1.23
%U https://aclanthology.org/2025.conll-1.23/
%U https://doi.org/10.18653/v1/2025.conll-1.23
%P 348-364
Markdown (Informal)
[Bridging the Socioeconomic Gap in Education: A Hybrid AI and Human Annotation Approach](https://aclanthology.org/2025.conll-1.23/) (Abdelgaber et al., CoNLL 2025)
ACL