@inproceedings{wang-dragut-2024-overlooked,
title = "The Overlooked Repetitive Lengthening Form in Sentiment Analysis",
author = "Wang, Lei and
Dragut, Eduard",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.952",
pages = "16225--16238",
abstract = "Individuals engaging in online communication frequently express personal opinions with informal styles (e.g., memes and emojis). While Language Models (LMs) with informal communications have been widely discussed, a unique and emphatic style, the Repetitive Lengthening Form (RLF), has been overlooked for years. In this paper, we explore answers to two research questions: 1) Is RLF important for SA? 2) Can LMs understand RLF? Inspired by previous linguistic research, we curate **Lengthening**, the first multi-domain dataset with 850k samples focused on RLF for sentiment analysis. Moreover, we introduce **Explnstruct**, a two-stage Explainable Instruction Tuning framework aimed at improving both the performance and explainability of LLMs for RLF. We further propose a novel unified approach to quantify LMs{'} understanding of informal expressions. We show that RLF sentences are expressive expressions and can serve as signatures of document-level sentiment. Additionally, RLF has potential value for online content analysis. Our comprehensive results show that fine-tuned Pre-trained Language Models (PLMs) can surpass zero-shot GPT-4 in performance but not in explanation for RLF. Finally, we show ExpInstruct can improve the open-sourced LLMs to match zero-shot GPT-4 in performance and explainability for RLF with limited samples. Code and sample data are available at https://github.com/Tom-Owl/OverlookedRLF",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-dragut-2024-overlooked">
<titleInfo>
<title>The Overlooked Repetitive Lengthening Form in Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Dragut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Individuals engaging in online communication frequently express personal opinions with informal styles (e.g., memes and emojis). While Language Models (LMs) with informal communications have been widely discussed, a unique and emphatic style, the Repetitive Lengthening Form (RLF), has been overlooked for years. In this paper, we explore answers to two research questions: 1) Is RLF important for SA? 2) Can LMs understand RLF? Inspired by previous linguistic research, we curate **Lengthening**, the first multi-domain dataset with 850k samples focused on RLF for sentiment analysis. Moreover, we introduce **Explnstruct**, a two-stage Explainable Instruction Tuning framework aimed at improving both the performance and explainability of LLMs for RLF. We further propose a novel unified approach to quantify LMs’ understanding of informal expressions. We show that RLF sentences are expressive expressions and can serve as signatures of document-level sentiment. Additionally, RLF has potential value for online content analysis. Our comprehensive results show that fine-tuned Pre-trained Language Models (PLMs) can surpass zero-shot GPT-4 in performance but not in explanation for RLF. Finally, we show ExpInstruct can improve the open-sourced LLMs to match zero-shot GPT-4 in performance and explainability for RLF with limited samples. Code and sample data are available at https://github.com/Tom-Owl/OverlookedRLF</abstract>
<identifier type="citekey">wang-dragut-2024-overlooked</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.952</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>16225</start>
<end>16238</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Overlooked Repetitive Lengthening Form in Sentiment Analysis
%A Wang, Lei
%A Dragut, Eduard
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F wang-dragut-2024-overlooked
%X Individuals engaging in online communication frequently express personal opinions with informal styles (e.g., memes and emojis). While Language Models (LMs) with informal communications have been widely discussed, a unique and emphatic style, the Repetitive Lengthening Form (RLF), has been overlooked for years. In this paper, we explore answers to two research questions: 1) Is RLF important for SA? 2) Can LMs understand RLF? Inspired by previous linguistic research, we curate **Lengthening**, the first multi-domain dataset with 850k samples focused on RLF for sentiment analysis. Moreover, we introduce **Explnstruct**, a two-stage Explainable Instruction Tuning framework aimed at improving both the performance and explainability of LLMs for RLF. We further propose a novel unified approach to quantify LMs’ understanding of informal expressions. We show that RLF sentences are expressive expressions and can serve as signatures of document-level sentiment. Additionally, RLF has potential value for online content analysis. Our comprehensive results show that fine-tuned Pre-trained Language Models (PLMs) can surpass zero-shot GPT-4 in performance but not in explanation for RLF. Finally, we show ExpInstruct can improve the open-sourced LLMs to match zero-shot GPT-4 in performance and explainability for RLF with limited samples. Code and sample data are available at https://github.com/Tom-Owl/OverlookedRLF
%U https://aclanthology.org/2024.findings-emnlp.952
%P 16225-16238
Markdown (Informal)
[The Overlooked Repetitive Lengthening Form in Sentiment Analysis](https://aclanthology.org/2024.findings-emnlp.952) (Wang & Dragut, Findings 2024)
ACL