@inproceedings{mahlaza-etal-2024-reprohum,
title = "{R}epro{H}um {\#}0866-04: Another Evaluation of Readers{'} Reactions to News Headlines",
author = "Mahlaza, Zola and
Raboanary, Toky Hajatiana and
Seakgwa, Kyle and
Keet, C. Maria",
editor = "Balloccu, Simone and
Belz, Anya and
Huidrom, Rudali and
Reiter, Ehud and
Sedoc, Joao and
Thomson, Craig",
booktitle = "Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.humeval-1.26",
pages = "274--280",
abstract = "The reproduction of Natural Language Processing (NLP) studies is important in establishing their reliability. Nonetheless, many papers in NLP have never been reproduced. This paper presents a reproduction of Gabriel et al. (2022){'}s work to establish the extent to which their findings, pertaining to the utility of large language models (T5 and GPT2) to automatically generate writer{'}s intents when given headlines to curb misinformation, can be confirmed. Our results show no evidence to support two of their four findings and they partially support the rest of the original findings. Specifically, while we confirmed that all the models are judged to be capable of influencing readers{'} trust or distrust, there was a difference in T5{'}s capability to reduce trust. Our results show that its generations are more likely to have greater influence in reducing trust while Gabriel et al. (2022) found more cases where they had no impact at all. In addition, most of the model generations are considered socially acceptable only if we relax the criteria for determining a majority to mean more than chance rather than the apparent {\textgreater} 70{\%} of the original study. Overall, while they found that {``}machine-generated MRF implications alongside news headlines to readers can increase their trust in real news while decreasing their trust in misinformation{''}, we found that they are more likely to decrease trust in both cases vs. having no impact at all.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahlaza-etal-2024-reprohum">
<titleInfo>
<title>ReproHum #0866-04: Another Evaluation of Readers’ Reactions to News Headlines</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zola</namePart>
<namePart type="family">Mahlaza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Toky</namePart>
<namePart type="given">Hajatiana</namePart>
<namePart type="family">Raboanary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Seakgwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Keet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Balloccu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudali</namePart>
<namePart type="family">Huidrom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joao</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The reproduction of Natural Language Processing (NLP) studies is important in establishing their reliability. Nonetheless, many papers in NLP have never been reproduced. This paper presents a reproduction of Gabriel et al. (2022)’s work to establish the extent to which their findings, pertaining to the utility of large language models (T5 and GPT2) to automatically generate writer’s intents when given headlines to curb misinformation, can be confirmed. Our results show no evidence to support two of their four findings and they partially support the rest of the original findings. Specifically, while we confirmed that all the models are judged to be capable of influencing readers’ trust or distrust, there was a difference in T5’s capability to reduce trust. Our results show that its generations are more likely to have greater influence in reducing trust while Gabriel et al. (2022) found more cases where they had no impact at all. In addition, most of the model generations are considered socially acceptable only if we relax the criteria for determining a majority to mean more than chance rather than the apparent \textgreater 70% of the original study. Overall, while they found that “machine-generated MRF implications alongside news headlines to readers can increase their trust in real news while decreasing their trust in misinformation”, we found that they are more likely to decrease trust in both cases vs. having no impact at all.</abstract>
<identifier type="citekey">mahlaza-etal-2024-reprohum</identifier>
<location>
<url>https://aclanthology.org/2024.humeval-1.26</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>274</start>
<end>280</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ReproHum #0866-04: Another Evaluation of Readers’ Reactions to News Headlines
%A Mahlaza, Zola
%A Raboanary, Toky Hajatiana
%A Seakgwa, Kyle
%A Keet, C. Maria
%Y Balloccu, Simone
%Y Belz, Anya
%Y Huidrom, Rudali
%Y Reiter, Ehud
%Y Sedoc, Joao
%Y Thomson, Craig
%S Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F mahlaza-etal-2024-reprohum
%X The reproduction of Natural Language Processing (NLP) studies is important in establishing their reliability. Nonetheless, many papers in NLP have never been reproduced. This paper presents a reproduction of Gabriel et al. (2022)’s work to establish the extent to which their findings, pertaining to the utility of large language models (T5 and GPT2) to automatically generate writer’s intents when given headlines to curb misinformation, can be confirmed. Our results show no evidence to support two of their four findings and they partially support the rest of the original findings. Specifically, while we confirmed that all the models are judged to be capable of influencing readers’ trust or distrust, there was a difference in T5’s capability to reduce trust. Our results show that its generations are more likely to have greater influence in reducing trust while Gabriel et al. (2022) found more cases where they had no impact at all. In addition, most of the model generations are considered socially acceptable only if we relax the criteria for determining a majority to mean more than chance rather than the apparent \textgreater 70% of the original study. Overall, while they found that “machine-generated MRF implications alongside news headlines to readers can increase their trust in real news while decreasing their trust in misinformation”, we found that they are more likely to decrease trust in both cases vs. having no impact at all.
%U https://aclanthology.org/2024.humeval-1.26
%P 274-280
Markdown (Informal)
[ReproHum #0866-04: Another Evaluation of Readers’ Reactions to News Headlines](https://aclanthology.org/2024.humeval-1.26) (Mahlaza et al., HumEval-WS 2024)
ACL