@inproceedings{ondrejova-suppa-2024-llms,
title = "Can {LLM}s Handle Low-Resource Dialects? A Case Study on Translation and Common Sense Reasoning in {\v{S}}ari{\v{s}}",
author = "Ondrejov{\'a}, Vikt{\'o}ria and
{\v{S}}uppa, Marek",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.vardial-1.11",
doi = "10.18653/v1/2024.vardial-1.11",
pages = "130--139",
abstract = "While Large Language Models (LLMs) have demonstrated considerable potential in advancing natural language processing in dialect-specific contexts, their effectiveness in these settings has yet to be thoroughly assessed. This study introduces a case study on {\v{S}}ari{\v{s}}, a dialect of Slovak, which is itself a language with fewer resources, focusing on Machine Translation and Common Sense Reasoning tasks. We employ LLMs in a zero-shot configuration and for data augmentation to refine Slovak-{\v{S}}ari{\v{s}} and {\v{S}}ari{\v{s}}-Slovak translation models. The accuracy of these models is then manually verified by native speakers. Additionally, we introduce {\v{S}}ari{\v{s}}COPA, a new dataset for causal common sense reasoning, which, alongside SlovakCOPA, serves to evaluate LLM{'}s performance in a zero-shot framework. Our findings highlight LLM{'}s capabilities in processing low-resource dialects and suggest a viable approach for initiating dialect-specific translation models in such contexts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ondrejova-suppa-2024-llms">
<titleInfo>
<title>Can LLMs Handle Low-Resource Dialects? A Case Study on Translation and Common Sense Reasoning in Šariš</title>
</titleInfo>
<name type="personal">
<namePart type="given">Viktória</namePart>
<namePart type="family">Ondrejová</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Šuppa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While Large Language Models (LLMs) have demonstrated considerable potential in advancing natural language processing in dialect-specific contexts, their effectiveness in these settings has yet to be thoroughly assessed. This study introduces a case study on Šariš, a dialect of Slovak, which is itself a language with fewer resources, focusing on Machine Translation and Common Sense Reasoning tasks. We employ LLMs in a zero-shot configuration and for data augmentation to refine Slovak-Šariš and Šariš-Slovak translation models. The accuracy of these models is then manually verified by native speakers. Additionally, we introduce ŠarišCOPA, a new dataset for causal common sense reasoning, which, alongside SlovakCOPA, serves to evaluate LLM’s performance in a zero-shot framework. Our findings highlight LLM’s capabilities in processing low-resource dialects and suggest a viable approach for initiating dialect-specific translation models in such contexts.</abstract>
<identifier type="citekey">ondrejova-suppa-2024-llms</identifier>
<identifier type="doi">10.18653/v1/2024.vardial-1.11</identifier>
<location>
<url>https://aclanthology.org/2024.vardial-1.11</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>130</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can LLMs Handle Low-Resource Dialects? A Case Study on Translation and Common Sense Reasoning in Šariš
%A Ondrejová, Viktória
%A Šuppa, Marek
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Tiedemann, Jörg
%S Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F ondrejova-suppa-2024-llms
%X While Large Language Models (LLMs) have demonstrated considerable potential in advancing natural language processing in dialect-specific contexts, their effectiveness in these settings has yet to be thoroughly assessed. This study introduces a case study on Šariš, a dialect of Slovak, which is itself a language with fewer resources, focusing on Machine Translation and Common Sense Reasoning tasks. We employ LLMs in a zero-shot configuration and for data augmentation to refine Slovak-Šariš and Šariš-Slovak translation models. The accuracy of these models is then manually verified by native speakers. Additionally, we introduce ŠarišCOPA, a new dataset for causal common sense reasoning, which, alongside SlovakCOPA, serves to evaluate LLM’s performance in a zero-shot framework. Our findings highlight LLM’s capabilities in processing low-resource dialects and suggest a viable approach for initiating dialect-specific translation models in such contexts.
%R 10.18653/v1/2024.vardial-1.11
%U https://aclanthology.org/2024.vardial-1.11
%U https://doi.org/10.18653/v1/2024.vardial-1.11
%P 130-139
Markdown (Informal)
[Can LLMs Handle Low-Resource Dialects? A Case Study on Translation and Common Sense Reasoning in Šariš](https://aclanthology.org/2024.vardial-1.11) (Ondrejová & Šuppa, VarDial-WS 2024)
ACL