@inproceedings{menzio-etal-2025-c,
title = "{C}-{SHAP}: Collocation-Aware Explanations for Financial {NLP}",
author = "Menzio, Martina and
Fersini, Elisabetta and
Paris, Davide",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.82/",
pages = "711--717",
abstract = "Understanding the internal decision-making process of NLP models in high-stakes domains such as the financial sector is particularly challenging due to the complexity of domain-specific terminology and the need for transparency and accountability. Although SHAP is a widely used model-agnostic method for attributing model predictions to input features, its standard formulation treats input tokens as independent units, failing to capture the influence of collocations that often carry non-compositional meaning, instead modeled by the current language models. We introduce C-SHAP, an extension of SHAP that incorporates collocational dependencies into the explanation process to account for word combinations in the financial sector. C-SHAP dynamically groups tokens into significant collocations using a financial glossary and computes Shapley values over these structured units. The proposed approach has been evaluated to explain sentiment classification of Federal Reserve Minutes, demonstrating improved alignment with human rationales and better association to model behaviour compared to the standard token-level approach."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="menzio-etal-2025-c">
<titleInfo>
<title>C-SHAP: Collocation-Aware Explanations for Financial NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martina</namePart>
<namePart type="family">Menzio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisabetta</namePart>
<namePart type="family">Fersini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Davide</namePart>
<namePart type="family">Paris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Understanding the internal decision-making process of NLP models in high-stakes domains such as the financial sector is particularly challenging due to the complexity of domain-specific terminology and the need for transparency and accountability. Although SHAP is a widely used model-agnostic method for attributing model predictions to input features, its standard formulation treats input tokens as independent units, failing to capture the influence of collocations that often carry non-compositional meaning, instead modeled by the current language models. We introduce C-SHAP, an extension of SHAP that incorporates collocational dependencies into the explanation process to account for word combinations in the financial sector. C-SHAP dynamically groups tokens into significant collocations using a financial glossary and computes Shapley values over these structured units. The proposed approach has been evaluated to explain sentiment classification of Federal Reserve Minutes, demonstrating improved alignment with human rationales and better association to model behaviour compared to the standard token-level approach.</abstract>
<identifier type="citekey">menzio-etal-2025-c</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.82/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>711</start>
<end>717</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T C-SHAP: Collocation-Aware Explanations for Financial NLP
%A Menzio, Martina
%A Fersini, Elisabetta
%A Paris, Davide
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F menzio-etal-2025-c
%X Understanding the internal decision-making process of NLP models in high-stakes domains such as the financial sector is particularly challenging due to the complexity of domain-specific terminology and the need for transparency and accountability. Although SHAP is a widely used model-agnostic method for attributing model predictions to input features, its standard formulation treats input tokens as independent units, failing to capture the influence of collocations that often carry non-compositional meaning, instead modeled by the current language models. We introduce C-SHAP, an extension of SHAP that incorporates collocational dependencies into the explanation process to account for word combinations in the financial sector. C-SHAP dynamically groups tokens into significant collocations using a financial glossary and computes Shapley values over these structured units. The proposed approach has been evaluated to explain sentiment classification of Federal Reserve Minutes, demonstrating improved alignment with human rationales and better association to model behaviour compared to the standard token-level approach.
%U https://aclanthology.org/2025.ranlp-1.82/
%P 711-717
Markdown (Informal)
[C-SHAP: Collocation-Aware Explanations for Financial NLP](https://aclanthology.org/2025.ranlp-1.82/) (Menzio et al., RANLP 2025)
ACL
- Martina Menzio, Elisabetta Fersini, and Davide Paris. 2025. C-SHAP: Collocation-Aware Explanations for Financial NLP. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 711–717, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.