@inproceedings{aguirre-etal-2025-fine,
title = "Fine-Tuning Medium-Scale {LLM}s for Joint Intent Classification and Slot Filling: A Data-Efficient and Cost-Effective Solution for {SME}s",
author = "Aguirre, Maia and
M{\'e}ndez, Ariane and
del Pozo, Arantza and
Torres, Maria Ines and
Torralbo, Manuel",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Darwish, Kareem and
Agarwal, Apoorv",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: Industry Track",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-industry.21/",
pages = "251--262",
abstract = "Dialogue Systems (DS) are increasingly in demand for automating tasks through natural language interactions. However, the core techniques for user comprehension in DS depend heavily on large amounts of labeled data, limiting their applicability in data-scarce environments common to many companies. This paper identifies best practices for data-efficient development and cost-effective deployment of DS in real-world application scenarios. We evaluate whether fine-tuning a medium-sized Large Language Model (LLM) for joint Intent Classification (IC) and Slot Filling (SF), with moderate hardware resource requirements still affordable by SMEs, can achieve competitive performance using less data compared to current state-of-the-art models. Experiments on the Spanish and English portions of the MASSIVE corpus demonstrate that the Llama-3-8B-Instruct model fine-tuned with only 10{\%} of the data outperforms the JointBERT architecture and GPT-4o in a zero-shot prompting setup in monolingual settings. In cross-lingual scenarios, Llama-3-8B-Instruct drastically outperforms multilingual JointBERT demonstrating a vastly superior performance when fine-tuned in a language and evaluated in the other."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aguirre-etal-2025-fine">
<titleInfo>
<title>Fine-Tuning Medium-Scale LLMs for Joint Intent Classification and Slot Filling: A Data-Efficient and Cost-Effective Solution for SMEs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maia</namePart>
<namePart type="family">Aguirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ariane</namePart>
<namePart type="family">Méndez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arantza</namePart>
<namePart type="family">del Pozo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Ines</namePart>
<namePart type="family">Torres</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Torralbo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apoorv</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dialogue Systems (DS) are increasingly in demand for automating tasks through natural language interactions. However, the core techniques for user comprehension in DS depend heavily on large amounts of labeled data, limiting their applicability in data-scarce environments common to many companies. This paper identifies best practices for data-efficient development and cost-effective deployment of DS in real-world application scenarios. We evaluate whether fine-tuning a medium-sized Large Language Model (LLM) for joint Intent Classification (IC) and Slot Filling (SF), with moderate hardware resource requirements still affordable by SMEs, can achieve competitive performance using less data compared to current state-of-the-art models. Experiments on the Spanish and English portions of the MASSIVE corpus demonstrate that the Llama-3-8B-Instruct model fine-tuned with only 10% of the data outperforms the JointBERT architecture and GPT-4o in a zero-shot prompting setup in monolingual settings. In cross-lingual scenarios, Llama-3-8B-Instruct drastically outperforms multilingual JointBERT demonstrating a vastly superior performance when fine-tuned in a language and evaluated in the other.</abstract>
<identifier type="citekey">aguirre-etal-2025-fine</identifier>
<location>
<url>https://aclanthology.org/2025.coling-industry.21/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>251</start>
<end>262</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-Tuning Medium-Scale LLMs for Joint Intent Classification and Slot Filling: A Data-Efficient and Cost-Effective Solution for SMEs
%A Aguirre, Maia
%A Méndez, Ariane
%A del Pozo, Arantza
%A Torres, Maria Ines
%A Torralbo, Manuel
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%Y Darwish, Kareem
%Y Agarwal, Apoorv
%S Proceedings of the 31st International Conference on Computational Linguistics: Industry Track
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F aguirre-etal-2025-fine
%X Dialogue Systems (DS) are increasingly in demand for automating tasks through natural language interactions. However, the core techniques for user comprehension in DS depend heavily on large amounts of labeled data, limiting their applicability in data-scarce environments common to many companies. This paper identifies best practices for data-efficient development and cost-effective deployment of DS in real-world application scenarios. We evaluate whether fine-tuning a medium-sized Large Language Model (LLM) for joint Intent Classification (IC) and Slot Filling (SF), with moderate hardware resource requirements still affordable by SMEs, can achieve competitive performance using less data compared to current state-of-the-art models. Experiments on the Spanish and English portions of the MASSIVE corpus demonstrate that the Llama-3-8B-Instruct model fine-tuned with only 10% of the data outperforms the JointBERT architecture and GPT-4o in a zero-shot prompting setup in monolingual settings. In cross-lingual scenarios, Llama-3-8B-Instruct drastically outperforms multilingual JointBERT demonstrating a vastly superior performance when fine-tuned in a language and evaluated in the other.
%U https://aclanthology.org/2025.coling-industry.21/
%P 251-262
Markdown (Informal)
[Fine-Tuning Medium-Scale LLMs for Joint Intent Classification and Slot Filling: A Data-Efficient and Cost-Effective Solution for SMEs](https://aclanthology.org/2025.coling-industry.21/) (Aguirre et al., COLING 2025)
ACL