@inproceedings{urchs-etal-2025-taz2024full,
title = "taz2024full: Analysing {G}erman Newspapers for Gender Bias and Discrimination across Decades",
author = "Urchs, Stefanie and
Thurner, Veronika and
A{\ss}enmacher, Matthias and
Heumann, Christian and
Thiemichen, Stephanie",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.555/",
doi = "10.18653/v1/2025.findings-acl.555",
pages = "10661--10671",
ISBN = "979-8-89176-256-5",
abstract = "Open-access corpora are essential for advancing natural language processing (NLP) and computational social science (CSS). However,large-scale resources for German remain limited, restricting research on linguistic trends and societal issues such as gender bias. Wepresent taz2024full, the largest publicly available corpus of German newspaper articles to date, comprising over 1.8 million texts fromtaz, spanning 1980 to 2024.As a demonstration of the corpus{'}s utility for bias and discrimination research, we analyse gender representation across four decades ofreporting. We find a consistent overrepresentation of men, but also a gradual shift toward more balanced coverage in recent years. Usinga scalable, structured analysis pipeline, we provide a foundation for studying actor mentions, sentiment, and linguistic framing in Germanjournalistic texts.The corpus supports a wide range of applications, from diachronic language analysis to critical media studies, and is freely available tofoster inclusive and reproducible research in German-language NLP."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="urchs-etal-2025-taz2024full">
<titleInfo>
<title>taz2024full: Analysing German Newspapers for Gender Bias and Discrimination across Decades</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefanie</namePart>
<namePart type="family">Urchs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronika</namePart>
<namePart type="family">Thurner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Aßenmacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Heumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="family">Thiemichen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Open-access corpora are essential for advancing natural language processing (NLP) and computational social science (CSS). However,large-scale resources for German remain limited, restricting research on linguistic trends and societal issues such as gender bias. Wepresent taz2024full, the largest publicly available corpus of German newspaper articles to date, comprising over 1.8 million texts fromtaz, spanning 1980 to 2024.As a demonstration of the corpus’s utility for bias and discrimination research, we analyse gender representation across four decades ofreporting. We find a consistent overrepresentation of men, but also a gradual shift toward more balanced coverage in recent years. Usinga scalable, structured analysis pipeline, we provide a foundation for studying actor mentions, sentiment, and linguistic framing in Germanjournalistic texts.The corpus supports a wide range of applications, from diachronic language analysis to critical media studies, and is freely available tofoster inclusive and reproducible research in German-language NLP.</abstract>
<identifier type="citekey">urchs-etal-2025-taz2024full</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.555</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.555/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>10661</start>
<end>10671</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T taz2024full: Analysing German Newspapers for Gender Bias and Discrimination across Decades
%A Urchs, Stefanie
%A Thurner, Veronika
%A Aßenmacher, Matthias
%A Heumann, Christian
%A Thiemichen, Stephanie
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F urchs-etal-2025-taz2024full
%X Open-access corpora are essential for advancing natural language processing (NLP) and computational social science (CSS). However,large-scale resources for German remain limited, restricting research on linguistic trends and societal issues such as gender bias. Wepresent taz2024full, the largest publicly available corpus of German newspaper articles to date, comprising over 1.8 million texts fromtaz, spanning 1980 to 2024.As a demonstration of the corpus’s utility for bias and discrimination research, we analyse gender representation across four decades ofreporting. We find a consistent overrepresentation of men, but also a gradual shift toward more balanced coverage in recent years. Usinga scalable, structured analysis pipeline, we provide a foundation for studying actor mentions, sentiment, and linguistic framing in Germanjournalistic texts.The corpus supports a wide range of applications, from diachronic language analysis to critical media studies, and is freely available tofoster inclusive and reproducible research in German-language NLP.
%R 10.18653/v1/2025.findings-acl.555
%U https://aclanthology.org/2025.findings-acl.555/
%U https://doi.org/10.18653/v1/2025.findings-acl.555
%P 10661-10671
Markdown (Informal)
[taz2024full: Analysing German Newspapers for Gender Bias and Discrimination across Decades](https://aclanthology.org/2025.findings-acl.555/) (Urchs et al., Findings 2025)
ACL