@inproceedings{mosquera-2022-tackling,
title = "Tackling Data Drift with Adversarial Validation: An Application for {G}erman Text Complexity Estimation",
author = "Mosquera, Alejandro",
editor = {M{\"o}ller, Sebastian and
Mohtaj, Salar and
Naderi, Babak},
booktitle = "Proceedings of the GermEval 2022 Workshop on Text Complexity Assessment of German Text",
month = sep,
year = "2022",
address = "Potsdam, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.germeval-1.7",
pages = "39--44",
abstract = "This paper describes the winning approach in the first automated German text complexity assessment shared task as part of KONVENS 2022. To solve this difficult problem, the evaluated system relies on an ensemble of regression models that successfully combines both traditional feature engineering and pre-trained resources. Moreover, the use of adversarial validation is proposed as a method for countering the data drift identified during the development phase, thus helping to select relevant models and features and avoid leaderboard overfitting. The best submission reached 0.43 mapped RMSE on the test set during the final phase of the competition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mosquera-2022-tackling">
<titleInfo>
<title>Tackling Data Drift with Adversarial Validation: An Application for German Text Complexity Estimation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alejandro</namePart>
<namePart type="family">Mosquera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the GermEval 2022 Workshop on Text Complexity Assessment of German Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salar</namePart>
<namePart type="family">Mohtaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Babak</namePart>
<namePart type="family">Naderi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Potsdam, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the winning approach in the first automated German text complexity assessment shared task as part of KONVENS 2022. To solve this difficult problem, the evaluated system relies on an ensemble of regression models that successfully combines both traditional feature engineering and pre-trained resources. Moreover, the use of adversarial validation is proposed as a method for countering the data drift identified during the development phase, thus helping to select relevant models and features and avoid leaderboard overfitting. The best submission reached 0.43 mapped RMSE on the test set during the final phase of the competition.</abstract>
<identifier type="citekey">mosquera-2022-tackling</identifier>
<location>
<url>https://aclanthology.org/2022.germeval-1.7</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>39</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tackling Data Drift with Adversarial Validation: An Application for German Text Complexity Estimation
%A Mosquera, Alejandro
%Y Möller, Sebastian
%Y Mohtaj, Salar
%Y Naderi, Babak
%S Proceedings of the GermEval 2022 Workshop on Text Complexity Assessment of German Text
%D 2022
%8 September
%I Association for Computational Linguistics
%C Potsdam, Germany
%F mosquera-2022-tackling
%X This paper describes the winning approach in the first automated German text complexity assessment shared task as part of KONVENS 2022. To solve this difficult problem, the evaluated system relies on an ensemble of regression models that successfully combines both traditional feature engineering and pre-trained resources. Moreover, the use of adversarial validation is proposed as a method for countering the data drift identified during the development phase, thus helping to select relevant models and features and avoid leaderboard overfitting. The best submission reached 0.43 mapped RMSE on the test set during the final phase of the competition.
%U https://aclanthology.org/2022.germeval-1.7
%P 39-44
Markdown (Informal)
[Tackling Data Drift with Adversarial Validation: An Application for German Text Complexity Estimation](https://aclanthology.org/2022.germeval-1.7) (Mosquera, GermEval 2022)
ACL