@inproceedings{kellert-tyagi-2025-languages,
title = "Where and How Do Languages Mix? A Study of {S}panish-{G}uaran{\'i} Code-Switching in {P}araguay",
author = "Kellert, Olga and
Tyagi, Nemika",
editor = "Winata, Genta Indra and
Kar, Sudipta and
Zhukova, Marina and
Solorio, Thamar and
Ai, Xi and
Hamed, Injy and
Ihsani, Mahardika Krisna Krisna and
Wijaya, Derry Tanti and
Kuwanto, Garry",
booktitle = "Proceedings of the 7th Workshop on Computational Approaches to Linguistic Code-Switching",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.calcs-1.4/",
doi = "10.18653/v1/2025.calcs-1.4",
pages = "26--31",
ISBN = "979-8-89176-053-0",
abstract = "Code-switching, the alternating use of multiple languages within a single utterance, is a widespread linguistic phenomenon that poses unique challenges for both sociolinguistic analysis and Natural Language Processing (NLP). While prior research has explored code-switching from either a syntactic or geographic perspective, few studies have integrated both aspects, particularly for underexplored language pairs like Spanish-Guaran{\'i}. In this paper, we analyze Spanish-Guaran{\'i} code-switching using a dataset of geotagged tweets from Asunci{\'o}n, Paraguay, collected from 2017 to 2021. We employ a differential distribution method to map the geographic distribution of code-switching across urban zones and analyze its syntactic positioning within sentences. Our findings reveal distinct spatial patterns, with Guaran{\'i}-dominant tweets concentrated in the western and southwestern areas, while Spanish-only tweets are more prevalent in central and eastern regions. Syntactic analysis shows that code-switching occurs most frequently in the middle of sentences, often involving verbs, pronouns, and adjectives. These results provide new insights into the interaction between linguistic, social, and geographic factors in bilingual communication. Our study contributes to both sociolinguistic research and NLP applications, offering a framework for analyzing mixed-language data in digital communication."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kellert-tyagi-2025-languages">
<titleInfo>
<title>Where and How Do Languages Mix? A Study of Spanish-Guaraní Code-Switching in Paraguay</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Kellert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nemika</namePart>
<namePart type="family">Tyagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on Computational Approaches to Linguistic Code-Switching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Genta</namePart>
<namePart type="given">Indra</namePart>
<namePart type="family">Winata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Zhukova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xi</namePart>
<namePart type="family">Ai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Injy</namePart>
<namePart type="family">Hamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahardika</namePart>
<namePart type="given">Krisna</namePart>
<namePart type="given">Krisna</namePart>
<namePart type="family">Ihsani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derry</namePart>
<namePart type="given">Tanti</namePart>
<namePart type="family">Wijaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Garry</namePart>
<namePart type="family">Kuwanto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-053-0</identifier>
</relatedItem>
<abstract>Code-switching, the alternating use of multiple languages within a single utterance, is a widespread linguistic phenomenon that poses unique challenges for both sociolinguistic analysis and Natural Language Processing (NLP). While prior research has explored code-switching from either a syntactic or geographic perspective, few studies have integrated both aspects, particularly for underexplored language pairs like Spanish-Guaraní. In this paper, we analyze Spanish-Guaraní code-switching using a dataset of geotagged tweets from Asunción, Paraguay, collected from 2017 to 2021. We employ a differential distribution method to map the geographic distribution of code-switching across urban zones and analyze its syntactic positioning within sentences. Our findings reveal distinct spatial patterns, with Guaraní-dominant tweets concentrated in the western and southwestern areas, while Spanish-only tweets are more prevalent in central and eastern regions. Syntactic analysis shows that code-switching occurs most frequently in the middle of sentences, often involving verbs, pronouns, and adjectives. These results provide new insights into the interaction between linguistic, social, and geographic factors in bilingual communication. Our study contributes to both sociolinguistic research and NLP applications, offering a framework for analyzing mixed-language data in digital communication.</abstract>
<identifier type="citekey">kellert-tyagi-2025-languages</identifier>
<identifier type="doi">10.18653/v1/2025.calcs-1.4</identifier>
<location>
<url>https://aclanthology.org/2025.calcs-1.4/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>26</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Where and How Do Languages Mix? A Study of Spanish-Guaraní Code-Switching in Paraguay
%A Kellert, Olga
%A Tyagi, Nemika
%Y Winata, Genta Indra
%Y Kar, Sudipta
%Y Zhukova, Marina
%Y Solorio, Thamar
%Y Ai, Xi
%Y Hamed, Injy
%Y Ihsani, Mahardika Krisna Krisna
%Y Wijaya, Derry Tanti
%Y Kuwanto, Garry
%S Proceedings of the 7th Workshop on Computational Approaches to Linguistic Code-Switching
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico, USA
%@ 979-8-89176-053-0
%F kellert-tyagi-2025-languages
%X Code-switching, the alternating use of multiple languages within a single utterance, is a widespread linguistic phenomenon that poses unique challenges for both sociolinguistic analysis and Natural Language Processing (NLP). While prior research has explored code-switching from either a syntactic or geographic perspective, few studies have integrated both aspects, particularly for underexplored language pairs like Spanish-Guaraní. In this paper, we analyze Spanish-Guaraní code-switching using a dataset of geotagged tweets from Asunción, Paraguay, collected from 2017 to 2021. We employ a differential distribution method to map the geographic distribution of code-switching across urban zones and analyze its syntactic positioning within sentences. Our findings reveal distinct spatial patterns, with Guaraní-dominant tweets concentrated in the western and southwestern areas, while Spanish-only tweets are more prevalent in central and eastern regions. Syntactic analysis shows that code-switching occurs most frequently in the middle of sentences, often involving verbs, pronouns, and adjectives. These results provide new insights into the interaction between linguistic, social, and geographic factors in bilingual communication. Our study contributes to both sociolinguistic research and NLP applications, offering a framework for analyzing mixed-language data in digital communication.
%R 10.18653/v1/2025.calcs-1.4
%U https://aclanthology.org/2025.calcs-1.4/
%U https://doi.org/10.18653/v1/2025.calcs-1.4
%P 26-31
Markdown (Informal)
[Where and How Do Languages Mix? A Study of Spanish-Guaraní Code-Switching in Paraguay](https://aclanthology.org/2025.calcs-1.4/) (Kellert & Tyagi, CALCS 2025)
ACL