@inproceedings{bombardelli-etal-2026-multilingual,
title = "A Multilingual Voice Analytics Module for Contact-Center Hiring",
author = "Bombardelli, Wagner W. {\'A}vila and
Serrani, Vanessa Marquiafavel and
Kuboo, Edgard and
Miss{\~a}o, Erica C. Marins",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.6/",
pages = "57--66",
ISBN = "979-8-89176-387-6",
abstract = "Contact-center operations often face significant challenges in identifying candidates whose vocal performance aligns with high-quality customer interactions. Existing speech analytics tools typically assess only content, providing limited insight into how candidates speak. To address this gap, we introduce SR-Voice, a multilingual speech analytics module designed to support call-center hiring. SR-Voice extends a previous text-only auditor by integrating segment-level, audio-native analysis capable of generating judgments, concise evidence-based rationales, and 0{--}10 scores across three dimensions: Emotion, Communication, and Rhythm. Our two-stage architecture first applies an audio-native model to propose a label, which is then reassessed by a lightweight auditor that combines transcript cues with acoustic and timing indicators grounded in phonetic and prosodic theory. We evaluate SR-Voice on a production-like volunteer dataset, reporting strong agreement and calibration performance reaching Macro-F1 = 0.83; Expected Calibration Error (ECE) = 0.053. The hybrid system achieves state-of-the-art calibration without post-hoc adjustment, with the audio-only variant attaining the lowest Negative Log-Likelihood (NLL) = 0.472). Designed for operational practicality, SR-Voice emphasizes traceability, short rationales, and well-calibrated probabilities suitable for threshold-based decisions and human-in-the-loop triage. We also discuss privacy-preserving storage and the prospective masking of Personally Identifiable Information (PII) for archival data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bombardelli-etal-2026-multilingual">
<titleInfo>
<title>A Multilingual Voice Analytics Module for Contact-Center Hiring</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wagner</namePart>
<namePart type="given">W</namePart>
<namePart type="given">Ávila</namePart>
<namePart type="family">Bombardelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanessa</namePart>
<namePart type="given">Marquiafavel</namePart>
<namePart type="family">Serrani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edgard</namePart>
<namePart type="family">Kuboo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erica</namePart>
<namePart type="given">C</namePart>
<namePart type="given">Marins</namePart>
<namePart type="family">Missão</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Contact-center operations often face significant challenges in identifying candidates whose vocal performance aligns with high-quality customer interactions. Existing speech analytics tools typically assess only content, providing limited insight into how candidates speak. To address this gap, we introduce SR-Voice, a multilingual speech analytics module designed to support call-center hiring. SR-Voice extends a previous text-only auditor by integrating segment-level, audio-native analysis capable of generating judgments, concise evidence-based rationales, and 0–10 scores across three dimensions: Emotion, Communication, and Rhythm. Our two-stage architecture first applies an audio-native model to propose a label, which is then reassessed by a lightweight auditor that combines transcript cues with acoustic and timing indicators grounded in phonetic and prosodic theory. We evaluate SR-Voice on a production-like volunteer dataset, reporting strong agreement and calibration performance reaching Macro-F1 = 0.83; Expected Calibration Error (ECE) = 0.053. The hybrid system achieves state-of-the-art calibration without post-hoc adjustment, with the audio-only variant attaining the lowest Negative Log-Likelihood (NLL) = 0.472). Designed for operational practicality, SR-Voice emphasizes traceability, short rationales, and well-calibrated probabilities suitable for threshold-based decisions and human-in-the-loop triage. We also discuss privacy-preserving storage and the prospective masking of Personally Identifiable Information (PII) for archival data.</abstract>
<identifier type="citekey">bombardelli-etal-2026-multilingual</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.6/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>57</start>
<end>66</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Multilingual Voice Analytics Module for Contact-Center Hiring
%A Bombardelli, Wagner W. Ávila
%A Serrani, Vanessa Marquiafavel
%A Kuboo, Edgard
%A Missão, Erica C. Marins
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F bombardelli-etal-2026-multilingual
%X Contact-center operations often face significant challenges in identifying candidates whose vocal performance aligns with high-quality customer interactions. Existing speech analytics tools typically assess only content, providing limited insight into how candidates speak. To address this gap, we introduce SR-Voice, a multilingual speech analytics module designed to support call-center hiring. SR-Voice extends a previous text-only auditor by integrating segment-level, audio-native analysis capable of generating judgments, concise evidence-based rationales, and 0–10 scores across three dimensions: Emotion, Communication, and Rhythm. Our two-stage architecture first applies an audio-native model to propose a label, which is then reassessed by a lightweight auditor that combines transcript cues with acoustic and timing indicators grounded in phonetic and prosodic theory. We evaluate SR-Voice on a production-like volunteer dataset, reporting strong agreement and calibration performance reaching Macro-F1 = 0.83; Expected Calibration Error (ECE) = 0.053. The hybrid system achieves state-of-the-art calibration without post-hoc adjustment, with the audio-only variant attaining the lowest Negative Log-Likelihood (NLL) = 0.472). Designed for operational practicality, SR-Voice emphasizes traceability, short rationales, and well-calibrated probabilities suitable for threshold-based decisions and human-in-the-loop triage. We also discuss privacy-preserving storage and the prospective masking of Personally Identifiable Information (PII) for archival data.
%U https://aclanthology.org/2026.propor-1.6/
%P 57-66
Markdown (Informal)
[A Multilingual Voice Analytics Module for Contact-Center Hiring](https://aclanthology.org/2026.propor-1.6/) (Bombardelli et al., PROPOR 2026)
ACL
- Wagner W. Ávila Bombardelli, Vanessa Marquiafavel Serrani, Edgard Kuboo, and Erica C. Marins Missão. 2026. A Multilingual Voice Analytics Module for Contact-Center Hiring. In Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1, pages 57–66, Salvador, Brazil. Association for Computational Linguistics.