@inproceedings{baris-schlicht-etal-2024-pitfalls,
title = "Pitfalls of Conversational {LLM}s on News Debiasing",
author = "Baris Schlicht, Ipek and
Altiok, Defne and
Taouk, Maryanne and
Flek, Lucie",
editor = "Hautli-Janisz, Annette and
Lapesa, Gabriella and
Anastasiou, Lucas and
Gold, Valentin and
Liddo, Anna De and
Reed, Chris",
booktitle = "Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.delite-1.4",
pages = "33--38",
abstract = "This paper addresses debiasing in news editing and evaluates the effectiveness of conversational Large Language Models in this task. We designed an evaluation checklist tailored to news editors{'} perspectives, obtained generated texts from three popular conversational models using a subset of a publicly available dataset in media bias, and evaluated the texts according to the designed checklist. Furthermore, we examined the models as evaluator for checking the quality of debiased model outputs. Our findings indicate that none of the LLMs are perfect in debiasing. Notably, some models, including ChatGPT, introduced unnecessary changes that may impact the author{'}s style and create misinformation. Lastly, we show that the models do not perform as proficiently as domain experts in evaluating the quality of debiased outputs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="baris-schlicht-etal-2024-pitfalls">
<titleInfo>
<title>Pitfalls of Conversational LLMs on News Debiasing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ipek</namePart>
<namePart type="family">Baris Schlicht</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Defne</namePart>
<namePart type="family">Altiok</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maryanne</namePart>
<namePart type="family">Taouk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annette</namePart>
<namePart type="family">Hautli-Janisz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Lapesa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucas</namePart>
<namePart type="family">Anastasiou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Gold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="given">De</namePart>
<namePart type="family">Liddo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Reed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper addresses debiasing in news editing and evaluates the effectiveness of conversational Large Language Models in this task. We designed an evaluation checklist tailored to news editors’ perspectives, obtained generated texts from three popular conversational models using a subset of a publicly available dataset in media bias, and evaluated the texts according to the designed checklist. Furthermore, we examined the models as evaluator for checking the quality of debiased model outputs. Our findings indicate that none of the LLMs are perfect in debiasing. Notably, some models, including ChatGPT, introduced unnecessary changes that may impact the author’s style and create misinformation. Lastly, we show that the models do not perform as proficiently as domain experts in evaluating the quality of debiased outputs.</abstract>
<identifier type="citekey">baris-schlicht-etal-2024-pitfalls</identifier>
<location>
<url>https://aclanthology.org/2024.delite-1.4</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>33</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pitfalls of Conversational LLMs on News Debiasing
%A Baris Schlicht, Ipek
%A Altiok, Defne
%A Taouk, Maryanne
%A Flek, Lucie
%Y Hautli-Janisz, Annette
%Y Lapesa, Gabriella
%Y Anastasiou, Lucas
%Y Gold, Valentin
%Y Liddo, Anna De
%Y Reed, Chris
%S Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F baris-schlicht-etal-2024-pitfalls
%X This paper addresses debiasing in news editing and evaluates the effectiveness of conversational Large Language Models in this task. We designed an evaluation checklist tailored to news editors’ perspectives, obtained generated texts from three popular conversational models using a subset of a publicly available dataset in media bias, and evaluated the texts according to the designed checklist. Furthermore, we examined the models as evaluator for checking the quality of debiased model outputs. Our findings indicate that none of the LLMs are perfect in debiasing. Notably, some models, including ChatGPT, introduced unnecessary changes that may impact the author’s style and create misinformation. Lastly, we show that the models do not perform as proficiently as domain experts in evaluating the quality of debiased outputs.
%U https://aclanthology.org/2024.delite-1.4
%P 33-38
Markdown (Informal)
[Pitfalls of Conversational LLMs on News Debiasing](https://aclanthology.org/2024.delite-1.4) (Baris Schlicht et al., DELITE 2024)
ACL
- Ipek Baris Schlicht, Defne Altiok, Maryanne Taouk, and Lucie Flek. 2024. Pitfalls of Conversational LLMs on News Debiasing. In Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024, pages 33–38, Torino, Italia. ELRA and ICCL.