@inproceedings{glisic-etal-2025-testing,
title = "Testing relevant linguistic features in automatic {CEFR} skill level classification for {Icelandic}",
author = "Gli{\v{s}}i{\'c}, Isidora and
Richter, Caitlin Laura and
Ingason, Anton Karl",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.22/",
pages = "217--222",
ISBN = "978-9908-53-109-0",
abstract = "This paper explores the use of various linguistic features to develop models for automatic classification of language proficiency on the CEFR scale for Icelandic, a low-resourced and morphologically complex language. We train two classifiers to assess skill level of learner texts. One is used as a baseline and takes in the original unaltered text written by a learner and uses predominantly surface features to assess the level. The other uses both surface and other morphological and lexical features, as well as context vectors from transformer (IceBERT). It takes in both the original and corrected versions of the text and takes into account errors/deviation of the original texts compared to the corrected versions. Both classifiers show promising results, with baseline models achieving between 62.2-67.1{\%} accuracy and dual-version between 75-80.3{\%}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="glisic-etal-2025-testing">
<titleInfo>
<title>Testing relevant linguistic features in automatic CEFR skill level classification for Icelandic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isidora</namePart>
<namePart type="family">Glišić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caitlin</namePart>
<namePart type="given">Laura</namePart>
<namePart type="family">Richter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anton</namePart>
<namePart type="given">Karl</namePart>
<namePart type="family">Ingason</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>This paper explores the use of various linguistic features to develop models for automatic classification of language proficiency on the CEFR scale for Icelandic, a low-resourced and morphologically complex language. We train two classifiers to assess skill level of learner texts. One is used as a baseline and takes in the original unaltered text written by a learner and uses predominantly surface features to assess the level. The other uses both surface and other morphological and lexical features, as well as context vectors from transformer (IceBERT). It takes in both the original and corrected versions of the text and takes into account errors/deviation of the original texts compared to the corrected versions. Both classifiers show promising results, with baseline models achieving between 62.2-67.1% accuracy and dual-version between 75-80.3%.</abstract>
<identifier type="citekey">glisic-etal-2025-testing</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.22/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>217</start>
<end>222</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Testing relevant linguistic features in automatic CEFR skill level classification for Icelandic
%A Glišić, Isidora
%A Richter, Caitlin Laura
%A Ingason, Anton Karl
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F glisic-etal-2025-testing
%X This paper explores the use of various linguistic features to develop models for automatic classification of language proficiency on the CEFR scale for Icelandic, a low-resourced and morphologically complex language. We train two classifiers to assess skill level of learner texts. One is used as a baseline and takes in the original unaltered text written by a learner and uses predominantly surface features to assess the level. The other uses both surface and other morphological and lexical features, as well as context vectors from transformer (IceBERT). It takes in both the original and corrected versions of the text and takes into account errors/deviation of the original texts compared to the corrected versions. Both classifiers show promising results, with baseline models achieving between 62.2-67.1% accuracy and dual-version between 75-80.3%.
%U https://aclanthology.org/2025.nodalida-1.22/
%P 217-222
Markdown (Informal)
[Testing relevant linguistic features in automatic CEFR skill level classification for Icelandic](https://aclanthology.org/2025.nodalida-1.22/) (Glišić et al., NoDaLiDa 2025)
ACL