@inproceedings{paul-etal-2025-generating,
title = "Generating and Analyzing Disfluency in a Code-Mixed Setting",
author = "Paul, Aryan and
Mondal, Tapabrata and
Das, Dipankar and
Bandyopadhyay, Sivaji",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.105/",
pages = "915--924",
abstract = "This work explores the intersection of code-mixing and disfluency in bilingual speech and text, with a focus on understanding how large language models (LLMs) handle code-mixed disfluent utterances. One of the primary objectives is to explore LLMs' ability to generate code-mixed disfluent sentences and to address the lack of high-quality code-mixed disfluent corpora, particularly for Indic languages. We aim to compare the performance of LLM-based approaches with traditional disfluency detection methods and to develop novel metrics for quantitatively assessing disfluency phenomena. Additionally, we investigate the relationship between code-mixing and disfluency, exploring how factors such as switching frequency and direction influence the occurrence of disfluencies. By analyzing these intriguing dynamics, we seek to gain a deeper understanding of the mutual influence between code-mixing and disfluency in multilingual speech."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paul-etal-2025-generating">
<titleInfo>
<title>Generating and Analyzing Disfluency in a Code-Mixed Setting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aryan</namePart>
<namePart type="family">Paul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tapabrata</namePart>
<namePart type="family">Mondal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipankar</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sivaji</namePart>
<namePart type="family">Bandyopadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work explores the intersection of code-mixing and disfluency in bilingual speech and text, with a focus on understanding how large language models (LLMs) handle code-mixed disfluent utterances. One of the primary objectives is to explore LLMs’ ability to generate code-mixed disfluent sentences and to address the lack of high-quality code-mixed disfluent corpora, particularly for Indic languages. We aim to compare the performance of LLM-based approaches with traditional disfluency detection methods and to develop novel metrics for quantitatively assessing disfluency phenomena. Additionally, we investigate the relationship between code-mixing and disfluency, exploring how factors such as switching frequency and direction influence the occurrence of disfluencies. By analyzing these intriguing dynamics, we seek to gain a deeper understanding of the mutual influence between code-mixing and disfluency in multilingual speech.</abstract>
<identifier type="citekey">paul-etal-2025-generating</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.105/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>915</start>
<end>924</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generating and Analyzing Disfluency in a Code-Mixed Setting
%A Paul, Aryan
%A Mondal, Tapabrata
%A Das, Dipankar
%A Bandyopadhyay, Sivaji
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F paul-etal-2025-generating
%X This work explores the intersection of code-mixing and disfluency in bilingual speech and text, with a focus on understanding how large language models (LLMs) handle code-mixed disfluent utterances. One of the primary objectives is to explore LLMs’ ability to generate code-mixed disfluent sentences and to address the lack of high-quality code-mixed disfluent corpora, particularly for Indic languages. We aim to compare the performance of LLM-based approaches with traditional disfluency detection methods and to develop novel metrics for quantitatively assessing disfluency phenomena. Additionally, we investigate the relationship between code-mixing and disfluency, exploring how factors such as switching frequency and direction influence the occurrence of disfluencies. By analyzing these intriguing dynamics, we seek to gain a deeper understanding of the mutual influence between code-mixing and disfluency in multilingual speech.
%U https://aclanthology.org/2025.ranlp-1.105/
%P 915-924
Markdown (Informal)
[Generating and Analyzing Disfluency in a Code-Mixed Setting](https://aclanthology.org/2025.ranlp-1.105/) (Paul et al., RANLP 2025)
ACL
- Aryan Paul, Tapabrata Mondal, Dipankar Das, and Sivaji Bandyopadhyay. 2025. Generating and Analyzing Disfluency in a Code-Mixed Setting. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 915–924, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.