@inproceedings{wang-li-2023-text,
title = "Text-Derived Language Identity Incorporation for End-to-End Code-Switching Speech Recognition",
author = "Wang, Qinyi and
Li, Haizhou",
editor = "Winata, Genta and
Kar, Sudipta and
Zhukova, Marina and
Solorio, Thamar and
Diab, Mona and
Sitaram, Sunayana and
Choudhury, Monojit and
Bali, Kalika",
booktitle = "Proceedings of the 6th Workshop on Computational Approaches to Linguistic Code-Switching",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.calcs-1.4/",
pages = "33--42",
abstract = "Recognizing code-switching (CS) speech often presents challenges for an automatic speech recognition system (ASR) due to limited linguistic context in short monolingual segments, resulting in language confusion. To mitigate this issue, language identity (LID) is often integrated into the speech recognition system to provide additional linguistic context. However, previous works predominately focus on extracting language identity from speech signals. We introduce a novel approach to learn language identity from pure text data via a dedicated language identity-language model. Besides, we explore two strategies: LID state fusion and language posterior biasing, to integrate the text-derived language identities into the end-to-end ASR system. By incorporating hypothesized language identities, our ASR system gains crucial contextual cues, effectively capturing language transitions and patterns within code-switched utterances. We conduct speech recognition experiments on the SEAME corpus and demonstrate the effectiveness of our proposed methods. Our results reveal significantly improved transcriptions in code-switching scenarios, underscoring the potential of text-derived LID in enhancing code-switching speech recognition."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-li-2023-text">
<titleInfo>
<title>Text-Derived Language Identity Incorporation for End-to-End Code-Switching Speech Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qinyi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haizhou</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Computational Approaches to Linguistic Code-Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Genta</namePart>
<namePart type="family">Winata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Zhukova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recognizing code-switching (CS) speech often presents challenges for an automatic speech recognition system (ASR) due to limited linguistic context in short monolingual segments, resulting in language confusion. To mitigate this issue, language identity (LID) is often integrated into the speech recognition system to provide additional linguistic context. However, previous works predominately focus on extracting language identity from speech signals. We introduce a novel approach to learn language identity from pure text data via a dedicated language identity-language model. Besides, we explore two strategies: LID state fusion and language posterior biasing, to integrate the text-derived language identities into the end-to-end ASR system. By incorporating hypothesized language identities, our ASR system gains crucial contextual cues, effectively capturing language transitions and patterns within code-switched utterances. We conduct speech recognition experiments on the SEAME corpus and demonstrate the effectiveness of our proposed methods. Our results reveal significantly improved transcriptions in code-switching scenarios, underscoring the potential of text-derived LID in enhancing code-switching speech recognition.</abstract>
<identifier type="citekey">wang-li-2023-text</identifier>
<location>
<url>https://aclanthology.org/2023.calcs-1.4/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>33</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Text-Derived Language Identity Incorporation for End-to-End Code-Switching Speech Recognition
%A Wang, Qinyi
%A Li, Haizhou
%Y Winata, Genta
%Y Kar, Sudipta
%Y Zhukova, Marina
%Y Solorio, Thamar
%Y Diab, Mona
%Y Sitaram, Sunayana
%Y Choudhury, Monojit
%Y Bali, Kalika
%S Proceedings of the 6th Workshop on Computational Approaches to Linguistic Code-Switching
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F wang-li-2023-text
%X Recognizing code-switching (CS) speech often presents challenges for an automatic speech recognition system (ASR) due to limited linguistic context in short monolingual segments, resulting in language confusion. To mitigate this issue, language identity (LID) is often integrated into the speech recognition system to provide additional linguistic context. However, previous works predominately focus on extracting language identity from speech signals. We introduce a novel approach to learn language identity from pure text data via a dedicated language identity-language model. Besides, we explore two strategies: LID state fusion and language posterior biasing, to integrate the text-derived language identities into the end-to-end ASR system. By incorporating hypothesized language identities, our ASR system gains crucial contextual cues, effectively capturing language transitions and patterns within code-switched utterances. We conduct speech recognition experiments on the SEAME corpus and demonstrate the effectiveness of our proposed methods. Our results reveal significantly improved transcriptions in code-switching scenarios, underscoring the potential of text-derived LID in enhancing code-switching speech recognition.
%U https://aclanthology.org/2023.calcs-1.4/
%P 33-42
Markdown (Informal)
[Text-Derived Language Identity Incorporation for End-to-End Code-Switching Speech Recognition](https://aclanthology.org/2023.calcs-1.4/) (Wang & Li, CALCS 2023)
ACL