@inproceedings{chan-etal-2026-word,
title = "Word Predictability on Code-switching Points in {C}antonese{--}{E}nglish Discourse",
author = "Chan, Ariel Shuk Ling and
Li, Yanting and
Poschl, Jacob",
editor = "Voigt, Rob and
Warstadt, Alex and
Feldman, Naomi and
Linzen, Tal",
booktitle = "Proceedings of the Society for Computation in Linguistics 2026",
month = jul,
year = "2026",
address = "San Diego, CA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.scil-main.21/",
pages = "230--243",
ISBN = "979-8-89176-412-5",
abstract = "This paper investigates how word predictability influences code-switching probability. We analyze 1,010 code-switched instances drawn from naturalistic sociolinguistic interviews with 41 Cantonese{--}English bilinguals across three bilingual groups (homeland, immersed, and heritage). In particular, we examine whether the predictability of switch points, operationalized as surprisal, influences the likelihood of code-switching. Using pretrained transformer-based language models, we estimate surprisal at the switch point under different modeling conditions, including autoregressive and masked models and varying amounts of contextual information. Mixed-effects logistic regressionanalyses show that less predictable words are more likely to be code-switched. These effects are largely consistent across model types and bilingual groups. Overall, these findings highlight the role of predictability in bilingual speech production and provide new insights into code-switching among bilingual speakers with diverse language experiences."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chan-etal-2026-word">
<titleInfo>
<title>Word Predictability on Code-switching Points in Cantonese–English Discourse</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ariel</namePart>
<namePart type="given">Shuk</namePart>
<namePart type="given">Ling</namePart>
<namePart type="family">Chan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanting</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Poschl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Society for Computation in Linguistics 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">Voigt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Warstadt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Feldman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, CA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-412-5</identifier>
</relatedItem>
<abstract>This paper investigates how word predictability influences code-switching probability. We analyze 1,010 code-switched instances drawn from naturalistic sociolinguistic interviews with 41 Cantonese–English bilinguals across three bilingual groups (homeland, immersed, and heritage). In particular, we examine whether the predictability of switch points, operationalized as surprisal, influences the likelihood of code-switching. Using pretrained transformer-based language models, we estimate surprisal at the switch point under different modeling conditions, including autoregressive and masked models and varying amounts of contextual information. Mixed-effects logistic regressionanalyses show that less predictable words are more likely to be code-switched. These effects are largely consistent across model types and bilingual groups. Overall, these findings highlight the role of predictability in bilingual speech production and provide new insights into code-switching among bilingual speakers with diverse language experiences.</abstract>
<identifier type="citekey">chan-etal-2026-word</identifier>
<location>
<url>https://aclanthology.org/2026.scil-main.21/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>230</start>
<end>243</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word Predictability on Code-switching Points in Cantonese–English Discourse
%A Chan, Ariel Shuk Ling
%A Li, Yanting
%A Poschl, Jacob
%Y Voigt, Rob
%Y Warstadt, Alex
%Y Feldman, Naomi
%Y Linzen, Tal
%S Proceedings of the Society for Computation in Linguistics 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, CA
%@ 979-8-89176-412-5
%F chan-etal-2026-word
%X This paper investigates how word predictability influences code-switching probability. We analyze 1,010 code-switched instances drawn from naturalistic sociolinguistic interviews with 41 Cantonese–English bilinguals across three bilingual groups (homeland, immersed, and heritage). In particular, we examine whether the predictability of switch points, operationalized as surprisal, influences the likelihood of code-switching. Using pretrained transformer-based language models, we estimate surprisal at the switch point under different modeling conditions, including autoregressive and masked models and varying amounts of contextual information. Mixed-effects logistic regressionanalyses show that less predictable words are more likely to be code-switched. These effects are largely consistent across model types and bilingual groups. Overall, these findings highlight the role of predictability in bilingual speech production and provide new insights into code-switching among bilingual speakers with diverse language experiences.
%U https://aclanthology.org/2026.scil-main.21/
%P 230-243
Markdown (Informal)
[Word Predictability on Code-switching Points in Cantonese–English Discourse](https://aclanthology.org/2026.scil-main.21/) (Chan et al., SCiL 2026)
ACL