@inproceedings{midtgaard-etal-2025-ltg,
title = "{LTG} at {V}ar{D}ial 2025 {N}or{SID}: More and Better Training Data for Slot and Intent Detection",
author = "Midtgaard, Marthe and
M{\ae}hlum, Petter and
Scherrer, Yves",
editor = "Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Nakov, Preslav and
Tiedemann, Jorg and
Zampieri, Marcos",
booktitle = "Proceedings of the 12th Workshop on NLP for Similar Languages, Varieties and Dialects",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.vardial-1.15/",
pages = "200--208",
abstract = "This paper describes the LTG submission to the VarDial 2025 shared task, where we participate in the Norwegian slot and intent detection subtasks. The shared task focuses on Norwegian dialects, which present challenges due to their low-resource nature and variation. We test a variety of neural models and training data configurations, with the focus on improving and extending the available Norwegian training data. This includes automatically re-aligning slot spans in Norwegian Bokm{\r{a}}l, as well as re-translating the original English training data into both Bokm{\r{a}}l and Nynorsk. {\%} to address dialectal diversity. We also re-annotate an external Norwegian dataset to augment the training data. Our best models achieve first place in both subtasks, achieving an span F1 score of 0.893 for slot filling and an accuracy of 0.980 for intent detection. Our results indicate that while translation quality is less critical, improving the slot labels has a notable impact on slot performance. Moreover, adding more standard Norwegian data improves performance, but incorporating even small amounts of dialectal data leads to greater gains."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="midtgaard-etal-2025-ltg">
<titleInfo>
<title>LTG at VarDial 2025 NorSID: More and Better Training Data for Slot and Intent Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marthe</namePart>
<namePart type="family">Midtgaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petter</namePart>
<namePart type="family">Mæhlum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the LTG submission to the VarDial 2025 shared task, where we participate in the Norwegian slot and intent detection subtasks. The shared task focuses on Norwegian dialects, which present challenges due to their low-resource nature and variation. We test a variety of neural models and training data configurations, with the focus on improving and extending the available Norwegian training data. This includes automatically re-aligning slot spans in Norwegian Bokmål, as well as re-translating the original English training data into both Bokmål and Nynorsk. % to address dialectal diversity. We also re-annotate an external Norwegian dataset to augment the training data. Our best models achieve first place in both subtasks, achieving an span F1 score of 0.893 for slot filling and an accuracy of 0.980 for intent detection. Our results indicate that while translation quality is less critical, improving the slot labels has a notable impact on slot performance. Moreover, adding more standard Norwegian data improves performance, but incorporating even small amounts of dialectal data leads to greater gains.</abstract>
<identifier type="citekey">midtgaard-etal-2025-ltg</identifier>
<location>
<url>https://aclanthology.org/2025.vardial-1.15/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>200</start>
<end>208</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LTG at VarDial 2025 NorSID: More and Better Training Data for Slot and Intent Detection
%A Midtgaard, Marthe
%A Mæhlum, Petter
%A Scherrer, Yves
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Nakov, Preslav
%Y Tiedemann, Jorg
%Y Zampieri, Marcos
%S Proceedings of the 12th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F midtgaard-etal-2025-ltg
%X This paper describes the LTG submission to the VarDial 2025 shared task, where we participate in the Norwegian slot and intent detection subtasks. The shared task focuses on Norwegian dialects, which present challenges due to their low-resource nature and variation. We test a variety of neural models and training data configurations, with the focus on improving and extending the available Norwegian training data. This includes automatically re-aligning slot spans in Norwegian Bokmål, as well as re-translating the original English training data into both Bokmål and Nynorsk. % to address dialectal diversity. We also re-annotate an external Norwegian dataset to augment the training data. Our best models achieve first place in both subtasks, achieving an span F1 score of 0.893 for slot filling and an accuracy of 0.980 for intent detection. Our results indicate that while translation quality is less critical, improving the slot labels has a notable impact on slot performance. Moreover, adding more standard Norwegian data improves performance, but incorporating even small amounts of dialectal data leads to greater gains.
%U https://aclanthology.org/2025.vardial-1.15/
%P 200-208
Markdown (Informal)
[LTG at VarDial 2025 NorSID: More and Better Training Data for Slot and Intent Detection](https://aclanthology.org/2025.vardial-1.15/) (Midtgaard et al., VarDial 2025)
ACL