@inproceedings{zhang-etal-2025-lass,
title = "{LASS}: A Novel and Economical Data Augmentation Framework Based on Language Models for Debiasing Opinion Summarization",
author = "Zhang, Yanyue and
Li, Pengfei and
Lai, Yilong and
He, Yulan and
Zhou, Deyu",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.412/",
pages = "6169--6183",
abstract = "As more than 70{\%} of reviews in the existing opinion summary data set are positive, current opinion summarization approaches are hesitant to generate negative summaries given the input of negative texts. To address such sentiment bias, a direct approach without the reliance on a specific structure is to generate additional data based on large language models to balance the emotional distribution of the dataset. However, large-scale data augmentation based on large language models faces an apparent disadvantage, the expensive costs. Therefore, in this paper, we propose LASS, a novel data augmentation framework based on both \textbf{LA}rge and \textbf{S}mall language models for debia\textbf{S}ing opinion summarization. Specifically, a small number of synthesized negative reviews is obtained by rewriting the positive text via a large language model. Then, a disentangle reconstruction model is trained based on the generated data. After training, a large amount of synthetic data can be obtained by decoding the new representation obtained from the combination of different sample representations and filtering based on perplexity degree and sentiment classification. Experiments have proved that LASS can effectively alleviate emotional bias, similar to using only large models, but in a more economical way."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-lass">
<titleInfo>
<title>LASS: A Novel and Economical Data Augmentation Framework Based on Language Models for Debiasing Opinion Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanyue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yilong</namePart>
<namePart type="family">Lai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deyu</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As more than 70% of reviews in the existing opinion summary data set are positive, current opinion summarization approaches are hesitant to generate negative summaries given the input of negative texts. To address such sentiment bias, a direct approach without the reliance on a specific structure is to generate additional data based on large language models to balance the emotional distribution of the dataset. However, large-scale data augmentation based on large language models faces an apparent disadvantage, the expensive costs. Therefore, in this paper, we propose LASS, a novel data augmentation framework based on both LArge and Small language models for debiaSing opinion summarization. Specifically, a small number of synthesized negative reviews is obtained by rewriting the positive text via a large language model. Then, a disentangle reconstruction model is trained based on the generated data. After training, a large amount of synthetic data can be obtained by decoding the new representation obtained from the combination of different sample representations and filtering based on perplexity degree and sentiment classification. Experiments have proved that LASS can effectively alleviate emotional bias, similar to using only large models, but in a more economical way.</abstract>
<identifier type="citekey">zhang-etal-2025-lass</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.412/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>6169</start>
<end>6183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LASS: A Novel and Economical Data Augmentation Framework Based on Language Models for Debiasing Opinion Summarization
%A Zhang, Yanyue
%A Li, Pengfei
%A Lai, Yilong
%A He, Yulan
%A Zhou, Deyu
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F zhang-etal-2025-lass
%X As more than 70% of reviews in the existing opinion summary data set are positive, current opinion summarization approaches are hesitant to generate negative summaries given the input of negative texts. To address such sentiment bias, a direct approach without the reliance on a specific structure is to generate additional data based on large language models to balance the emotional distribution of the dataset. However, large-scale data augmentation based on large language models faces an apparent disadvantage, the expensive costs. Therefore, in this paper, we propose LASS, a novel data augmentation framework based on both LArge and Small language models for debiaSing opinion summarization. Specifically, a small number of synthesized negative reviews is obtained by rewriting the positive text via a large language model. Then, a disentangle reconstruction model is trained based on the generated data. After training, a large amount of synthetic data can be obtained by decoding the new representation obtained from the combination of different sample representations and filtering based on perplexity degree and sentiment classification. Experiments have proved that LASS can effectively alleviate emotional bias, similar to using only large models, but in a more economical way.
%U https://aclanthology.org/2025.coling-main.412/
%P 6169-6183
Markdown (Informal)
[LASS: A Novel and Economical Data Augmentation Framework Based on Language Models for Debiasing Opinion Summarization](https://aclanthology.org/2025.coling-main.412/) (Zhang et al., COLING 2025)
ACL