@inproceedings{fresen-etal-2024-reprohum,
title = "{R}epro{H}um{\#}0043: Human Evaluation Reproducing Language Model as an Annotator: Exploring Dialogue Summarization on {AMI} Dataset",
author = "Fresen, Vivian and
Wu-Urbanek, Mei-Shin and
Eger, Steffen",
editor = "Balloccu, Simone and
Belz, Anya and
Huidrom, Rudali and
Reiter, Ehud and
Sedoc, Joao and
Thomson, Craig",
booktitle = "Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.humeval-1.17",
pages = "199--209",
abstract = "This study, conducted as part of the ReproHum project, aimed to replicate and evaluate the experiment presented in {``}Language Model as an Annotator: Exploring DialoGPT for Dialogue Summarization{''} by Feng et al. (2021). By employing DialoGPT, BART, and PGN models, the study assessed dialogue summarization{'}s informativeness. Based on the ReproHum project{'}s baselines, we conducted a human evaluation for the AIMI dataset, aiming to compare the results of the original study with our own experiments. Our objective is to contribute to the research on human evaluation and the reproducibility of the original study{'}s findings in the field of Natural Language Processing (NLP). Through this endeavor, we seek to enhance understanding and establish reliable benchmarks in human evaluation methodologies within the NLP domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fresen-etal-2024-reprohum">
<titleInfo>
<title>ReproHum#0043: Human Evaluation Reproducing Language Model as an Annotator: Exploring Dialogue Summarization on AMI Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vivian</namePart>
<namePart type="family">Fresen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei-Shin</namePart>
<namePart type="family">Wu-Urbanek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Balloccu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudali</namePart>
<namePart type="family">Huidrom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joao</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study, conducted as part of the ReproHum project, aimed to replicate and evaluate the experiment presented in “Language Model as an Annotator: Exploring DialoGPT for Dialogue Summarization” by Feng et al. (2021). By employing DialoGPT, BART, and PGN models, the study assessed dialogue summarization’s informativeness. Based on the ReproHum project’s baselines, we conducted a human evaluation for the AIMI dataset, aiming to compare the results of the original study with our own experiments. Our objective is to contribute to the research on human evaluation and the reproducibility of the original study’s findings in the field of Natural Language Processing (NLP). Through this endeavor, we seek to enhance understanding and establish reliable benchmarks in human evaluation methodologies within the NLP domain.</abstract>
<identifier type="citekey">fresen-etal-2024-reprohum</identifier>
<location>
<url>https://aclanthology.org/2024.humeval-1.17</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>199</start>
<end>209</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ReproHum#0043: Human Evaluation Reproducing Language Model as an Annotator: Exploring Dialogue Summarization on AMI Dataset
%A Fresen, Vivian
%A Wu-Urbanek, Mei-Shin
%A Eger, Steffen
%Y Balloccu, Simone
%Y Belz, Anya
%Y Huidrom, Rudali
%Y Reiter, Ehud
%Y Sedoc, Joao
%Y Thomson, Craig
%S Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F fresen-etal-2024-reprohum
%X This study, conducted as part of the ReproHum project, aimed to replicate and evaluate the experiment presented in “Language Model as an Annotator: Exploring DialoGPT for Dialogue Summarization” by Feng et al. (2021). By employing DialoGPT, BART, and PGN models, the study assessed dialogue summarization’s informativeness. Based on the ReproHum project’s baselines, we conducted a human evaluation for the AIMI dataset, aiming to compare the results of the original study with our own experiments. Our objective is to contribute to the research on human evaluation and the reproducibility of the original study’s findings in the field of Natural Language Processing (NLP). Through this endeavor, we seek to enhance understanding and establish reliable benchmarks in human evaluation methodologies within the NLP domain.
%U https://aclanthology.org/2024.humeval-1.17
%P 199-209
Markdown (Informal)
[ReproHum#0043: Human Evaluation Reproducing Language Model as an Annotator: Exploring Dialogue Summarization on AMI Dataset](https://aclanthology.org/2024.humeval-1.17) (Fresen et al., HumEval-WS 2024)
ACL