@inproceedings{rao-etal-2026-empath,
title = "{EMPATH}: An Ensemble Method for Automatic Fine-Grained Turn-Level Dialogue Empathy Evaluation with a Novel Emotional Distance Metric",
author = "Rao, Dongning and
Liang, Zhihua and
Jiang, Zhihua",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1790/",
pages = "35921--35942",
ISBN = "979-8-89176-395-1",
abstract = "Empathy is key to many professions. In recognition of this, the workshops on computational approaches to subjectivity, sentiment, and social media analysis (WASSA) hosted competitions to evaluate empathy in dialogue. While fine-tuning has proved successful in the competition, there are at least three shortcomings. First, novel metrics for empathy are absent. Second, classical dialogue evaluation metrics require further investigation. Third, the ensemble{'}s potential remained underdeveloped. To address these issues, we propose the EMPATH framework, which combines fine-tuned models, large language models, classical dialogue evaluation metrics, and a novel metric. The novel metric, ED, encourages the response{'}s emotional tone to be contextually appropriate. E.g., if the user expresses joy, a cheerful reaction should receive a higher ranking. Furthermore, we introduce a new robust and label-free ensemble strategy, HO, which integrates sub-metrics with the lowest correlation coefficient first. In addition to evaluating on the WASSA benchmark, we test EMPATH{'}s generalizability using the EmpatheticExchanges dataset (EX). Our experiment results demonstrate that EMPATH yields the best results on the competition dataset, and ablation studies validate our component selection. On EX, the Pearson correlation coefficient for the winner of WASSA 2024 is 0.4066, while EMPATH shows a statistically significant 8{\%} improvement (i.e., 0.4860)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rao-etal-2026-empath">
<titleInfo>
<title>EMPATH: An Ensemble Method for Automatic Fine-Grained Turn-Level Dialogue Empathy Evaluation with a Novel Emotional Distance Metric</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dongning</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhihua</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhihua</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Empathy is key to many professions. In recognition of this, the workshops on computational approaches to subjectivity, sentiment, and social media analysis (WASSA) hosted competitions to evaluate empathy in dialogue. While fine-tuning has proved successful in the competition, there are at least three shortcomings. First, novel metrics for empathy are absent. Second, classical dialogue evaluation metrics require further investigation. Third, the ensemble’s potential remained underdeveloped. To address these issues, we propose the EMPATH framework, which combines fine-tuned models, large language models, classical dialogue evaluation metrics, and a novel metric. The novel metric, ED, encourages the response’s emotional tone to be contextually appropriate. E.g., if the user expresses joy, a cheerful reaction should receive a higher ranking. Furthermore, we introduce a new robust and label-free ensemble strategy, HO, which integrates sub-metrics with the lowest correlation coefficient first. In addition to evaluating on the WASSA benchmark, we test EMPATH’s generalizability using the EmpatheticExchanges dataset (EX). Our experiment results demonstrate that EMPATH yields the best results on the competition dataset, and ablation studies validate our component selection. On EX, the Pearson correlation coefficient for the winner of WASSA 2024 is 0.4066, while EMPATH shows a statistically significant 8% improvement (i.e., 0.4860).</abstract>
<identifier type="citekey">rao-etal-2026-empath</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1790/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>35921</start>
<end>35942</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EMPATH: An Ensemble Method for Automatic Fine-Grained Turn-Level Dialogue Empathy Evaluation with a Novel Emotional Distance Metric
%A Rao, Dongning
%A Liang, Zhihua
%A Jiang, Zhihua
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F rao-etal-2026-empath
%X Empathy is key to many professions. In recognition of this, the workshops on computational approaches to subjectivity, sentiment, and social media analysis (WASSA) hosted competitions to evaluate empathy in dialogue. While fine-tuning has proved successful in the competition, there are at least three shortcomings. First, novel metrics for empathy are absent. Second, classical dialogue evaluation metrics require further investigation. Third, the ensemble’s potential remained underdeveloped. To address these issues, we propose the EMPATH framework, which combines fine-tuned models, large language models, classical dialogue evaluation metrics, and a novel metric. The novel metric, ED, encourages the response’s emotional tone to be contextually appropriate. E.g., if the user expresses joy, a cheerful reaction should receive a higher ranking. Furthermore, we introduce a new robust and label-free ensemble strategy, HO, which integrates sub-metrics with the lowest correlation coefficient first. In addition to evaluating on the WASSA benchmark, we test EMPATH’s generalizability using the EmpatheticExchanges dataset (EX). Our experiment results demonstrate that EMPATH yields the best results on the competition dataset, and ablation studies validate our component selection. On EX, the Pearson correlation coefficient for the winner of WASSA 2024 is 0.4066, while EMPATH shows a statistically significant 8% improvement (i.e., 0.4860).
%U https://aclanthology.org/2026.findings-acl.1790/
%P 35921-35942
Markdown (Informal)
[EMPATH: An Ensemble Method for Automatic Fine-Grained Turn-Level Dialogue Empathy Evaluation with a Novel Emotional Distance Metric](https://aclanthology.org/2026.findings-acl.1790/) (Rao et al., Findings 2026)
ACL