@inproceedings{weiss-meurers-2018-modeling,
title = "Modeling the Readability of {G}erman Targeting Adults and Children: An empirically broad analysis and its cross-corpus validation",
author = "Wei{\ss}, Zarah and
Meurers, Detmar",
editor = "Bender, Emily M. and
Derczynski, Leon and
Isabelle, Pierre",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/C18-1026",
pages = "303--317",
abstract = "We analyze two novel data sets of German educational media texts targeting adults and children. The analysis is based on 400 automatically extracted measures of linguistic complexity from a wide range of linguistic domains. We show that both data sets exhibit broad linguistic adaptation to the target audience, which generalizes across both data sets. Our most successful binary classification model for German readability robustly shows high accuracy between 89.4{\%}{--}98.9{\%} for both data sets. To our knowledge, this comprehensive German readability model is the first for which robust cross-corpus performance has been shown. The research also contributes resources for German readability assessment that are externally validated as successful for different target audiences: we compiled a new corpus of German news broadcast subtitles, the Tagesschau/Logo corpus, and crawled a GEO/GEOlino corpus substantially enlarging the data compiled by Hancke et al. 2012.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="weiss-meurers-2018-modeling">
<titleInfo>
<title>Modeling the Readability of German Targeting Adults and Children: An empirically broad analysis and its cross-corpus validation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zarah</namePart>
<namePart type="family">Weiß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Detmar</namePart>
<namePart type="family">Meurers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 27th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Isabelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We analyze two novel data sets of German educational media texts targeting adults and children. The analysis is based on 400 automatically extracted measures of linguistic complexity from a wide range of linguistic domains. We show that both data sets exhibit broad linguistic adaptation to the target audience, which generalizes across both data sets. Our most successful binary classification model for German readability robustly shows high accuracy between 89.4%–98.9% for both data sets. To our knowledge, this comprehensive German readability model is the first for which robust cross-corpus performance has been shown. The research also contributes resources for German readability assessment that are externally validated as successful for different target audiences: we compiled a new corpus of German news broadcast subtitles, the Tagesschau/Logo corpus, and crawled a GEO/GEOlino corpus substantially enlarging the data compiled by Hancke et al. 2012.</abstract>
<identifier type="citekey">weiss-meurers-2018-modeling</identifier>
<location>
<url>https://aclanthology.org/C18-1026</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>303</start>
<end>317</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modeling the Readability of German Targeting Adults and Children: An empirically broad analysis and its cross-corpus validation
%A Weiß, Zarah
%A Meurers, Detmar
%Y Bender, Emily M.
%Y Derczynski, Leon
%Y Isabelle, Pierre
%S Proceedings of the 27th International Conference on Computational Linguistics
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F weiss-meurers-2018-modeling
%X We analyze two novel data sets of German educational media texts targeting adults and children. The analysis is based on 400 automatically extracted measures of linguistic complexity from a wide range of linguistic domains. We show that both data sets exhibit broad linguistic adaptation to the target audience, which generalizes across both data sets. Our most successful binary classification model for German readability robustly shows high accuracy between 89.4%–98.9% for both data sets. To our knowledge, this comprehensive German readability model is the first for which robust cross-corpus performance has been shown. The research also contributes resources for German readability assessment that are externally validated as successful for different target audiences: we compiled a new corpus of German news broadcast subtitles, the Tagesschau/Logo corpus, and crawled a GEO/GEOlino corpus substantially enlarging the data compiled by Hancke et al. 2012.
%U https://aclanthology.org/C18-1026
%P 303-317
Markdown (Informal)
[Modeling the Readability of German Targeting Adults and Children: An empirically broad analysis and its cross-corpus validation](https://aclanthology.org/C18-1026) (Weiß & Meurers, COLING 2018)
ACL