@inproceedings{aksut-etal-2026-german,
title = "{G}erman-{E}nglish Code-Switching in Large Language Models",
author = {Aks{\"u}t, Firat Cem and
Hillmann, Stefan and
Knoeferle, Pia and
M{\"o}ller, Sebastian},
editor = {Scherrer, Yves and
Aepli, No{\"e}mi and
Blaschke, Verena and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Nakov, Preslav and
Tiedemann, J{\"o}rg and
Zampieri, Marcos},
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.7/",
doi = "10.18653/v1/2026.vardial-1.7",
pages = "87--100",
abstract = "Code-Switching (CS) is common in multilingual communication, yet it is unclear how well current Large Language Models (LLMs) reproduce naturally occurring switching patterns. This paper studies German{--}English CS ({''}Denglisch'') generated by GPT-4o and LLaMA-3.3, using Reddit data from the Denglisch Corpus as a reference. Model outputs are compared to authentic posts using established CS metrics (M-Index, I-Index, CESAR), an analysis of Shared Lexical Items (SLIs) as switch triggers, and a human evaluation of perceived naturalness and fluency. Both models approximate global CS characteristics but differ in the diversity and complexity in comparison to real data. LLaMA-3.3 more closely matches corpus-level metrics, whereas GPT-4o produces more conservative switching that is rated as significantly more natural and fluent. In addition, GPT-4o reproduces SLI-triggered switching patterns similar to those found in authentic data, while this effect is weaker for LLaMA-3.3."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aksut-etal-2026-german">
<titleInfo>
<title>German-English Code-Switching in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firat</namePart>
<namePart type="given">Cem</namePart>
<namePart type="family">Aksüt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Hillmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pia</namePart>
<namePart type="family">Knoeferle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noëmi</namePart>
<namePart type="family">Aepli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verena</namePart>
<namePart type="family">Blaschke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Code-Switching (CS) is common in multilingual communication, yet it is unclear how well current Large Language Models (LLMs) reproduce naturally occurring switching patterns. This paper studies German–English CS (”Denglisch”) generated by GPT-4o and LLaMA-3.3, using Reddit data from the Denglisch Corpus as a reference. Model outputs are compared to authentic posts using established CS metrics (M-Index, I-Index, CESAR), an analysis of Shared Lexical Items (SLIs) as switch triggers, and a human evaluation of perceived naturalness and fluency. Both models approximate global CS characteristics but differ in the diversity and complexity in comparison to real data. LLaMA-3.3 more closely matches corpus-level metrics, whereas GPT-4o produces more conservative switching that is rated as significantly more natural and fluent. In addition, GPT-4o reproduces SLI-triggered switching patterns similar to those found in authentic data, while this effect is weaker for LLaMA-3.3.</abstract>
<identifier type="citekey">aksut-etal-2026-german</identifier>
<identifier type="doi">10.18653/v1/2026.vardial-1.7</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.7/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>87</start>
<end>100</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T German-English Code-Switching in Large Language Models
%A Aksüt, Firat Cem
%A Hillmann, Stefan
%A Knoeferle, Pia
%A Möller, Sebastian
%Y Scherrer, Yves
%Y Aepli, Noëmi
%Y Blaschke, Verena
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Nakov, Preslav
%Y Tiedemann, Jörg
%Y Zampieri, Marcos
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F aksut-etal-2026-german
%X Code-Switching (CS) is common in multilingual communication, yet it is unclear how well current Large Language Models (LLMs) reproduce naturally occurring switching patterns. This paper studies German–English CS (”Denglisch”) generated by GPT-4o and LLaMA-3.3, using Reddit data from the Denglisch Corpus as a reference. Model outputs are compared to authentic posts using established CS metrics (M-Index, I-Index, CESAR), an analysis of Shared Lexical Items (SLIs) as switch triggers, and a human evaluation of perceived naturalness and fluency. Both models approximate global CS characteristics but differ in the diversity and complexity in comparison to real data. LLaMA-3.3 more closely matches corpus-level metrics, whereas GPT-4o produces more conservative switching that is rated as significantly more natural and fluent. In addition, GPT-4o reproduces SLI-triggered switching patterns similar to those found in authentic data, while this effect is weaker for LLaMA-3.3.
%R 10.18653/v1/2026.vardial-1.7
%U https://aclanthology.org/2026.vardial-1.7/
%U https://doi.org/10.18653/v1/2026.vardial-1.7
%P 87-100
Markdown (Informal)
[German-English Code-Switching in Large Language Models](https://aclanthology.org/2026.vardial-1.7/) (Aksüt et al., VarDial 2026)
ACL
- Firat Cem Aksüt, Stefan Hillmann, Pia Knoeferle, and Sebastian Möller. 2026. German-English Code-Switching in Large Language Models. In Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects, pages 87–100, Rabat, Morocco. Association for Computational Linguistics.