@inproceedings{andersson-etal-2025-applying,
title = "Applying and Optimising a Multi-Scale Probit Model for Cross-Source Text Complexity Classification and Ranking in {Swedish}",
author = {Andersson, Elsa and
Falkenjack, Johan and
J{\"o}nsson, Arne},
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.3/",
pages = "17--27",
ISBN = "978-9908-53-109-0",
abstract = "We present results from using Probit models to classify and rank texts of varying complexity from multiple sources. We use multiple linguistic sources including Swedish easy-to-read books and investigate data augmentation and feature regularisation as optimisation methods for text complexity assessment. Multi-Scale and Single Scale Probit models are implemented using different ratios of training data, and then compared. Overall, the findings suggest that the Multi-Scale Probit model is an effective method for classifying text complexity and ranking new texts and could be used to improve the performance on small datasets as well as normalize datasets labelled using different scales."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="andersson-etal-2025-applying">
<titleInfo>
<title>Applying and Optimising a Multi-Scale Probit Model for Cross-Source Text Complexity Classification and Ranking in Swedish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elsa</namePart>
<namePart type="family">Andersson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johan</namePart>
<namePart type="family">Falkenjack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arne</namePart>
<namePart type="family">Jönsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>We present results from using Probit models to classify and rank texts of varying complexity from multiple sources. We use multiple linguistic sources including Swedish easy-to-read books and investigate data augmentation and feature regularisation as optimisation methods for text complexity assessment. Multi-Scale and Single Scale Probit models are implemented using different ratios of training data, and then compared. Overall, the findings suggest that the Multi-Scale Probit model is an effective method for classifying text complexity and ranking new texts and could be used to improve the performance on small datasets as well as normalize datasets labelled using different scales.</abstract>
<identifier type="citekey">andersson-etal-2025-applying</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.3/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>17</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Applying and Optimising a Multi-Scale Probit Model for Cross-Source Text Complexity Classification and Ranking in Swedish
%A Andersson, Elsa
%A Falkenjack, Johan
%A Jönsson, Arne
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F andersson-etal-2025-applying
%X We present results from using Probit models to classify and rank texts of varying complexity from multiple sources. We use multiple linguistic sources including Swedish easy-to-read books and investigate data augmentation and feature regularisation as optimisation methods for text complexity assessment. Multi-Scale and Single Scale Probit models are implemented using different ratios of training data, and then compared. Overall, the findings suggest that the Multi-Scale Probit model is an effective method for classifying text complexity and ranking new texts and could be used to improve the performance on small datasets as well as normalize datasets labelled using different scales.
%U https://aclanthology.org/2025.nodalida-1.3/
%P 17-27
Markdown (Informal)
[Applying and Optimising a Multi-Scale Probit Model for Cross-Source Text Complexity Classification and Ranking in Swedish](https://aclanthology.org/2025.nodalida-1.3/) (Andersson et al., NoDaLiDa 2025)
ACL