@inproceedings{carrick-etal-2025-speech,
title = "Speech-Controlled Smart Speaker for Accurate, Real-Time Health and Care Record Management",
author = "Carrick, Jonathan E. and
Dethlefs, Nina and
Greaves, Lisa and
Gunturi, Venkata M. V. and
Kureshi, Rameez Raja and
Cheng, Yongqiang",
editor = "Torres, Maria Ines and
Matsuda, Yuki and
Callejas, Zoraida and
del Pozo, Arantza and
D'Haro, Luis Fernando",
booktitle = "Proceedings of the 15th International Workshop on Spoken Dialogue Systems Technology",
month = may,
year = "2025",
address = "Bilbao, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.iwsds-1.25/",
pages = "238--244",
ISBN = "979-8-89176-248-0",
abstract = "To help alleviate the pressures felt by care workers, we have begun new research into improving the efficiency of care plan management by advancing recent developments in automatic speech recognition. Our novel approach adapts off-the-shelf tools in a purpose-built application for the speech domain, addressing challenges of accent adaption, real-time processing and speech hallucinations. We augment the speech-recognition scope of Open AI{'}s Whisper model through fine-tuning, reducing word error rates (WERs) from 16.8 to 1.0 on a range of British dialects. Addressing the speech-hallucination side effect of adapting to real-time recognition by enforcing a signal-to-noise ratio threshold and audio stream checks, we achieve a WER of 5.1, compared to 14.9 with Whisper{'}s original model. These ongoing research efforts tackle challenges that are necessary to build the speech-control basis for a custom smart speaker system that is both accurate and timely."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="carrick-etal-2025-speech">
<titleInfo>
<title>Speech-Controlled Smart Speaker for Accurate, Real-Time Health and Care Record Management</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Carrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Dethlefs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Greaves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Venkata</namePart>
<namePart type="given">M</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Gunturi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rameez</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Kureshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongqiang</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Workshop on Spoken Dialogue Systems Technology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Ines</namePart>
<namePart type="family">Torres</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Matsuda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zoraida</namePart>
<namePart type="family">Callejas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arantza</namePart>
<namePart type="family">del Pozo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="given">Fernando</namePart>
<namePart type="family">D’Haro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bilbao, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-248-0</identifier>
</relatedItem>
<abstract>To help alleviate the pressures felt by care workers, we have begun new research into improving the efficiency of care plan management by advancing recent developments in automatic speech recognition. Our novel approach adapts off-the-shelf tools in a purpose-built application for the speech domain, addressing challenges of accent adaption, real-time processing and speech hallucinations. We augment the speech-recognition scope of Open AI’s Whisper model through fine-tuning, reducing word error rates (WERs) from 16.8 to 1.0 on a range of British dialects. Addressing the speech-hallucination side effect of adapting to real-time recognition by enforcing a signal-to-noise ratio threshold and audio stream checks, we achieve a WER of 5.1, compared to 14.9 with Whisper’s original model. These ongoing research efforts tackle challenges that are necessary to build the speech-control basis for a custom smart speaker system that is both accurate and timely.</abstract>
<identifier type="citekey">carrick-etal-2025-speech</identifier>
<location>
<url>https://aclanthology.org/2025.iwsds-1.25/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>238</start>
<end>244</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech-Controlled Smart Speaker for Accurate, Real-Time Health and Care Record Management
%A Carrick, Jonathan E.
%A Dethlefs, Nina
%A Greaves, Lisa
%A Gunturi, Venkata M. V.
%A Kureshi, Rameez Raja
%A Cheng, Yongqiang
%Y Torres, Maria Ines
%Y Matsuda, Yuki
%Y Callejas, Zoraida
%Y del Pozo, Arantza
%Y D’Haro, Luis Fernando
%S Proceedings of the 15th International Workshop on Spoken Dialogue Systems Technology
%D 2025
%8 May
%I Association for Computational Linguistics
%C Bilbao, Spain
%@ 979-8-89176-248-0
%F carrick-etal-2025-speech
%X To help alleviate the pressures felt by care workers, we have begun new research into improving the efficiency of care plan management by advancing recent developments in automatic speech recognition. Our novel approach adapts off-the-shelf tools in a purpose-built application for the speech domain, addressing challenges of accent adaption, real-time processing and speech hallucinations. We augment the speech-recognition scope of Open AI’s Whisper model through fine-tuning, reducing word error rates (WERs) from 16.8 to 1.0 on a range of British dialects. Addressing the speech-hallucination side effect of adapting to real-time recognition by enforcing a signal-to-noise ratio threshold and audio stream checks, we achieve a WER of 5.1, compared to 14.9 with Whisper’s original model. These ongoing research efforts tackle challenges that are necessary to build the speech-control basis for a custom smart speaker system that is both accurate and timely.
%U https://aclanthology.org/2025.iwsds-1.25/
%P 238-244
Markdown (Informal)
[Speech-Controlled Smart Speaker for Accurate, Real-Time Health and Care Record Management](https://aclanthology.org/2025.iwsds-1.25/) (Carrick et al., IWSDS 2025)
ACL