@inproceedings{angelov-etal-2025-difficulty,
title = "Difficulty Estimation in Natural Language Tasks with Action Scores",
author = "Angelov, Aleksandar and
Tashu, Tsegaye Misikir and
Valdenegro-Toro, Matias",
editor = "Cao, Trista and
Das, Anubrata and
Kumarage, Tharindu and
Wan, Yixin and
Krishna, Satyapriya and
Mehrabi, Ninareh and
Dhamala, Jwala and
Ramakrishna, Anil and
Galystan, Aram and
Kumar, Anoop and
Gupta, Rahul and
Chang, Kai-Wei",
booktitle = "Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.trustnlp-main.24/",
doi = "10.18653/v1/2025.trustnlp-main.24",
pages = "351--364",
ISBN = "979-8-89176-233-6",
abstract = "This study investigates the effectiveness of the action score, a metric originally developed for computer vision tasks, in estimating sample difficulty across various natural language processing (NLP) tasks. Using transformer-based models, the action score is applied to sentiment analysis, natural language inference, and abstractive text summarization. The results demonstrate that the action score can effectively identify challenging samples in sentiment analysis and natural language inference, often capturing difficult instances that are missed by more established metrics like entropy. However, the effectiveness of the action score appears to be task-dependent, as evidenced by its performance in the abstractive text summarization task, where it exhibits a nearly linear relationship with entropy. The findings suggest that the action score can provide valuable insights into the characteristics of challenging samples in NLP tasks, particularly in classification settings. However, its application should be carefully considered in the context of each specific task and in light of emerging research on the potential value of hard samples in machine learning."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="angelov-etal-2025-difficulty">
<titleInfo>
<title>Difficulty Estimation in Natural Language Tasks with Action Scores</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksandar</namePart>
<namePart type="family">Angelov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsegaye</namePart>
<namePart type="given">Misikir</namePart>
<namePart type="family">Tashu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matias</namePart>
<namePart type="family">Valdenegro-Toro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trista</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anubrata</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Kumarage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satyapriya</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ninareh</namePart>
<namePart type="family">Mehrabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jwala</namePart>
<namePart type="family">Dhamala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anil</namePart>
<namePart type="family">Ramakrishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aram</namePart>
<namePart type="family">Galystan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-233-6</identifier>
</relatedItem>
<abstract>This study investigates the effectiveness of the action score, a metric originally developed for computer vision tasks, in estimating sample difficulty across various natural language processing (NLP) tasks. Using transformer-based models, the action score is applied to sentiment analysis, natural language inference, and abstractive text summarization. The results demonstrate that the action score can effectively identify challenging samples in sentiment analysis and natural language inference, often capturing difficult instances that are missed by more established metrics like entropy. However, the effectiveness of the action score appears to be task-dependent, as evidenced by its performance in the abstractive text summarization task, where it exhibits a nearly linear relationship with entropy. The findings suggest that the action score can provide valuable insights into the characteristics of challenging samples in NLP tasks, particularly in classification settings. However, its application should be carefully considered in the context of each specific task and in light of emerging research on the potential value of hard samples in machine learning.</abstract>
<identifier type="citekey">angelov-etal-2025-difficulty</identifier>
<identifier type="doi">10.18653/v1/2025.trustnlp-main.24</identifier>
<location>
<url>https://aclanthology.org/2025.trustnlp-main.24/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>351</start>
<end>364</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Difficulty Estimation in Natural Language Tasks with Action Scores
%A Angelov, Aleksandar
%A Tashu, Tsegaye Misikir
%A Valdenegro-Toro, Matias
%Y Cao, Trista
%Y Das, Anubrata
%Y Kumarage, Tharindu
%Y Wan, Yixin
%Y Krishna, Satyapriya
%Y Mehrabi, Ninareh
%Y Dhamala, Jwala
%Y Ramakrishna, Anil
%Y Galystan, Aram
%Y Kumar, Anoop
%Y Gupta, Rahul
%Y Chang, Kai-Wei
%S Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-233-6
%F angelov-etal-2025-difficulty
%X This study investigates the effectiveness of the action score, a metric originally developed for computer vision tasks, in estimating sample difficulty across various natural language processing (NLP) tasks. Using transformer-based models, the action score is applied to sentiment analysis, natural language inference, and abstractive text summarization. The results demonstrate that the action score can effectively identify challenging samples in sentiment analysis and natural language inference, often capturing difficult instances that are missed by more established metrics like entropy. However, the effectiveness of the action score appears to be task-dependent, as evidenced by its performance in the abstractive text summarization task, where it exhibits a nearly linear relationship with entropy. The findings suggest that the action score can provide valuable insights into the characteristics of challenging samples in NLP tasks, particularly in classification settings. However, its application should be carefully considered in the context of each specific task and in light of emerging research on the potential value of hard samples in machine learning.
%R 10.18653/v1/2025.trustnlp-main.24
%U https://aclanthology.org/2025.trustnlp-main.24/
%U https://doi.org/10.18653/v1/2025.trustnlp-main.24
%P 351-364
Markdown (Informal)
[Difficulty Estimation in Natural Language Tasks with Action Scores](https://aclanthology.org/2025.trustnlp-main.24/) (Angelov et al., TrustNLP 2025)
ACL