@inproceedings{kostic-etal-2022-pseudo,
title = "Pseudo-Labels Are All You Need",
author = "Kosti{\'c}, Bogdan and
Lucka, Mathis and
Risch, Julian",
editor = {M{\"o}ller, Sebastian and
Mohtaj, Salar and
Naderi, Babak},
booktitle = "Proceedings of the GermEval 2022 Workshop on Text Complexity Assessment of German Text",
month = sep,
year = "2022",
address = "Potsdam, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.germeval-1.6",
pages = "33--38",
abstract = "Automatically estimating the complexity of texts for readers has a variety of applications, such as recommending texts with an appropriate complexity level to language learners or supporting the evaluation of text simplification approaches. In this paper, we present our submission to the Text Complexity DE Challenge 2022, a regression task where the goal is to predict the complexity of a German sentence for German learners at level B. Our approach relies on more than 220,000 pseudolabels created from the German Wikipedia and other corpora to train Transformer-based models, and refrains from any feature engineering or any additional, labeled data. We find that the pseudo-label-based approach gives impressive results yet requires little to no adjustment to the specific task and therefore could be easily adapted to other domains and tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kostic-etal-2022-pseudo">
<titleInfo>
<title>Pseudo-Labels Are All You Need</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bogdan</namePart>
<namePart type="family">Kostić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathis</namePart>
<namePart type="family">Lucka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Risch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the GermEval 2022 Workshop on Text Complexity Assessment of German Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salar</namePart>
<namePart type="family">Mohtaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Babak</namePart>
<namePart type="family">Naderi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Potsdam, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatically estimating the complexity of texts for readers has a variety of applications, such as recommending texts with an appropriate complexity level to language learners or supporting the evaluation of text simplification approaches. In this paper, we present our submission to the Text Complexity DE Challenge 2022, a regression task where the goal is to predict the complexity of a German sentence for German learners at level B. Our approach relies on more than 220,000 pseudolabels created from the German Wikipedia and other corpora to train Transformer-based models, and refrains from any feature engineering or any additional, labeled data. We find that the pseudo-label-based approach gives impressive results yet requires little to no adjustment to the specific task and therefore could be easily adapted to other domains and tasks.</abstract>
<identifier type="citekey">kostic-etal-2022-pseudo</identifier>
<location>
<url>https://aclanthology.org/2022.germeval-1.6</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>33</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pseudo-Labels Are All You Need
%A Kostić, Bogdan
%A Lucka, Mathis
%A Risch, Julian
%Y Möller, Sebastian
%Y Mohtaj, Salar
%Y Naderi, Babak
%S Proceedings of the GermEval 2022 Workshop on Text Complexity Assessment of German Text
%D 2022
%8 September
%I Association for Computational Linguistics
%C Potsdam, Germany
%F kostic-etal-2022-pseudo
%X Automatically estimating the complexity of texts for readers has a variety of applications, such as recommending texts with an appropriate complexity level to language learners or supporting the evaluation of text simplification approaches. In this paper, we present our submission to the Text Complexity DE Challenge 2022, a regression task where the goal is to predict the complexity of a German sentence for German learners at level B. Our approach relies on more than 220,000 pseudolabels created from the German Wikipedia and other corpora to train Transformer-based models, and refrains from any feature engineering or any additional, labeled data. We find that the pseudo-label-based approach gives impressive results yet requires little to no adjustment to the specific task and therefore could be easily adapted to other domains and tasks.
%U https://aclanthology.org/2022.germeval-1.6
%P 33-38
Markdown (Informal)
[Pseudo-Labels Are All You Need](https://aclanthology.org/2022.germeval-1.6) (Kostić et al., GermEval 2022)
ACL
- Bogdan Kostić, Mathis Lucka, and Julian Risch. 2022. Pseudo-Labels Are All You Need. In Proceedings of the GermEval 2022 Workshop on Text Complexity Assessment of German Text, pages 33–38, Potsdam, Germany. Association for Computational Linguistics.