@inproceedings{calo-etal-2025-lessons,
title = "Lessons from a User Experience Evaluation of {NLP} Interfaces",
author = "Cal{\`o}, Eduardo and
Penkert, Lydia and
Mahamood, Saad",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.159/",
doi = "10.18653/v1/2025.findings-naacl.159",
pages = "2915--2929",
ISBN = "979-8-89176-195-7",
abstract = "Human evaluations lay at the heart of evaluations within the field of Natural Language Processing (NLP). Seen as the ``golden standard'' of evaluations, questions are being asked on whether these evaluations are both reproducible and repeatable. One overlooked aspect is the design choices made by researchers when designing user interfaces (UIs). In this paper, four UIs used in past NLP human evaluations are assessed by UX experts, based on standardized human-centered interaction principles. Building on these insights, we derive several recommendations that the NLP community should apply when designing UIs, to enable more consistent human evaluation responses."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="calo-etal-2025-lessons">
<titleInfo>
<title>Lessons from a User Experience Evaluation of NLP Interfaces</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eduardo</namePart>
<namePart type="family">Calò</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lydia</namePart>
<namePart type="family">Penkert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Human evaluations lay at the heart of evaluations within the field of Natural Language Processing (NLP). Seen as the “golden standard” of evaluations, questions are being asked on whether these evaluations are both reproducible and repeatable. One overlooked aspect is the design choices made by researchers when designing user interfaces (UIs). In this paper, four UIs used in past NLP human evaluations are assessed by UX experts, based on standardized human-centered interaction principles. Building on these insights, we derive several recommendations that the NLP community should apply when designing UIs, to enable more consistent human evaluation responses.</abstract>
<identifier type="citekey">calo-etal-2025-lessons</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.159</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.159/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>2915</start>
<end>2929</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lessons from a User Experience Evaluation of NLP Interfaces
%A Calò, Eduardo
%A Penkert, Lydia
%A Mahamood, Saad
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F calo-etal-2025-lessons
%X Human evaluations lay at the heart of evaluations within the field of Natural Language Processing (NLP). Seen as the “golden standard” of evaluations, questions are being asked on whether these evaluations are both reproducible and repeatable. One overlooked aspect is the design choices made by researchers when designing user interfaces (UIs). In this paper, four UIs used in past NLP human evaluations are assessed by UX experts, based on standardized human-centered interaction principles. Building on these insights, we derive several recommendations that the NLP community should apply when designing UIs, to enable more consistent human evaluation responses.
%R 10.18653/v1/2025.findings-naacl.159
%U https://aclanthology.org/2025.findings-naacl.159/
%U https://doi.org/10.18653/v1/2025.findings-naacl.159
%P 2915-2929
Markdown (Informal)
[Lessons from a User Experience Evaluation of NLP Interfaces](https://aclanthology.org/2025.findings-naacl.159/) (Calò et al., Findings 2025)
ACL