@article{reiter-2025-evaluate,
title = "We Should Evaluate Real-World Impact",
author = "Reiter, Ehud",
journal = "Computational Linguistics",
volume = "51",
number = "4",
month = dec,
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.cl-4.10/",
doi = "10.1162/coli.a.18",
pages = "1419--1431",
abstract = "The ACL community has very little interest in evaluating the real-world impact of NLP systems. A structured survey of the ACL Anthology shows that perhaps 0.1{\%} of its papers contain such evaluations; furthermore most papers that include impact evaluations present them very sketchily and instead focus on metric evaluations. NLP technology would be more useful and more quickly adopted if we seriously tried to understand and evaluate its real-world impact."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="reiter-2025-evaluate">
<titleInfo>
<title>We Should Evaluate Real-World Impact</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>The ACL community has very little interest in evaluating the real-world impact of NLP systems. A structured survey of the ACL Anthology shows that perhaps 0.1% of its papers contain such evaluations; furthermore most papers that include impact evaluations present them very sketchily and instead focus on metric evaluations. NLP technology would be more useful and more quickly adopted if we seriously tried to understand and evaluate its real-world impact.</abstract>
<identifier type="citekey">reiter-2025-evaluate</identifier>
<identifier type="doi">10.1162/coli.a.18</identifier>
<location>
<url>https://aclanthology.org/2025.cl-4.10/</url>
</location>
<part>
<date>2025-12</date>
<detail type="volume"><number>51</number></detail>
<detail type="issue"><number>4</number></detail>
<extent unit="page">
<start>1419</start>
<end>1431</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T We Should Evaluate Real-World Impact
%A Reiter, Ehud
%J Computational Linguistics
%D 2025
%8 December
%V 51
%N 4
%I MIT Press
%C Cambridge, MA
%F reiter-2025-evaluate
%X The ACL community has very little interest in evaluating the real-world impact of NLP systems. A structured survey of the ACL Anthology shows that perhaps 0.1% of its papers contain such evaluations; furthermore most papers that include impact evaluations present them very sketchily and instead focus on metric evaluations. NLP technology would be more useful and more quickly adopted if we seriously tried to understand and evaluate its real-world impact.
%R 10.1162/coli.a.18
%U https://aclanthology.org/2025.cl-4.10/
%U https://doi.org/10.1162/coli.a.18
%P 1419-1431
Markdown (Informal)
[We Should Evaluate Real-World Impact](https://aclanthology.org/2025.cl-4.10/) (Reiter, CL 2025)
ACL