@inproceedings{chiarcos-2025-revisiting,
title = "Revisiting the Givenness Hierarchy. A Corpus-Based Evaluation",
author = "Chiarcos, Christian",
editor = "Ogrodniczuk, Maciej and
Novak, Michal and
Poesio, Massimo and
Pradhan, Sameer and
Ng, Vincent",
booktitle = "Proceedings of the Eighth Workshop on Computational Models of Reference, Anaphora and Coreference",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.crac-1.3/",
pages = "24--41",
abstract = "Gundel et al.{'}s Givenness Hierarchy remains one of the most influental frameworks of Information Status to this date, and has been employed in different technical contexts to account for context-sensitive and hearer-tailored language in human-machine interaction and natural language processing as well as as a topic of linguistic inquiry. At the same time, the data basis upon which this theory has been developed remains relatively thin. Although its applicability to a broad array of languages has been repeatedly confirmed, the empirical evidence presented for certain phenomena, and in particular, with respect to demonstrative determiners and demonstrative pronouns did not always reach conventional levels of statistical significance. In this paper, we provide an empirical, corpus-based re-assessment of two seminal papers for the Givenness Hierarchy, Gundel et al. (1990) and Gundel et al. (1993), where we aim to replicate their findings on the basis of corpora with coreference annotation for their original sample of languages, i.e., Arabic, Chinese, English, Japanese, Korean, Russian and Spanish. We describe the operationalization of Gundel et al.{'}s `cognitive statuses', their approximation by means of anaphoric relations, the preprocessing of diverse and heterogeneous corpora and evaluate Gundel et al.{'}s claims. Our contribution is three-fold: We evaluate the Givenness Hierarchy against quantitative data at a scale that allows to assess statistical significance, we discuss challenges and problems encountered in the process, in the preprocessing and in the interpretation of the diverse corpora, we provide two generalizations: a procedure for bootstrapping Givenness Hierarchies for other languages, and possible cross-linguistically applicable tendencies in the systems of referring expressions."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chiarcos-2025-revisiting">
<titleInfo>
<title>Revisiting the Givenness Hierarchy. A Corpus-Based Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Chiarcos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on Computational Models of Reference, Anaphora and Coreference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Novak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimo</namePart>
<namePart type="family">Poesio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Pradhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Gundel et al.’s Givenness Hierarchy remains one of the most influental frameworks of Information Status to this date, and has been employed in different technical contexts to account for context-sensitive and hearer-tailored language in human-machine interaction and natural language processing as well as as a topic of linguistic inquiry. At the same time, the data basis upon which this theory has been developed remains relatively thin. Although its applicability to a broad array of languages has been repeatedly confirmed, the empirical evidence presented for certain phenomena, and in particular, with respect to demonstrative determiners and demonstrative pronouns did not always reach conventional levels of statistical significance. In this paper, we provide an empirical, corpus-based re-assessment of two seminal papers for the Givenness Hierarchy, Gundel et al. (1990) and Gundel et al. (1993), where we aim to replicate their findings on the basis of corpora with coreference annotation for their original sample of languages, i.e., Arabic, Chinese, English, Japanese, Korean, Russian and Spanish. We describe the operationalization of Gundel et al.’s ‘cognitive statuses’, their approximation by means of anaphoric relations, the preprocessing of diverse and heterogeneous corpora and evaluate Gundel et al.’s claims. Our contribution is three-fold: We evaluate the Givenness Hierarchy against quantitative data at a scale that allows to assess statistical significance, we discuss challenges and problems encountered in the process, in the preprocessing and in the interpretation of the diverse corpora, we provide two generalizations: a procedure for bootstrapping Givenness Hierarchies for other languages, and possible cross-linguistically applicable tendencies in the systems of referring expressions.</abstract>
<identifier type="citekey">chiarcos-2025-revisiting</identifier>
<location>
<url>https://aclanthology.org/2025.crac-1.3/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>24</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Revisiting the Givenness Hierarchy. A Corpus-Based Evaluation
%A Chiarcos, Christian
%Y Ogrodniczuk, Maciej
%Y Novak, Michal
%Y Poesio, Massimo
%Y Pradhan, Sameer
%Y Ng, Vincent
%S Proceedings of the Eighth Workshop on Computational Models of Reference, Anaphora and Coreference
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%F chiarcos-2025-revisiting
%X Gundel et al.’s Givenness Hierarchy remains one of the most influental frameworks of Information Status to this date, and has been employed in different technical contexts to account for context-sensitive and hearer-tailored language in human-machine interaction and natural language processing as well as as a topic of linguistic inquiry. At the same time, the data basis upon which this theory has been developed remains relatively thin. Although its applicability to a broad array of languages has been repeatedly confirmed, the empirical evidence presented for certain phenomena, and in particular, with respect to demonstrative determiners and demonstrative pronouns did not always reach conventional levels of statistical significance. In this paper, we provide an empirical, corpus-based re-assessment of two seminal papers for the Givenness Hierarchy, Gundel et al. (1990) and Gundel et al. (1993), where we aim to replicate their findings on the basis of corpora with coreference annotation for their original sample of languages, i.e., Arabic, Chinese, English, Japanese, Korean, Russian and Spanish. We describe the operationalization of Gundel et al.’s ‘cognitive statuses’, their approximation by means of anaphoric relations, the preprocessing of diverse and heterogeneous corpora and evaluate Gundel et al.’s claims. Our contribution is three-fold: We evaluate the Givenness Hierarchy against quantitative data at a scale that allows to assess statistical significance, we discuss challenges and problems encountered in the process, in the preprocessing and in the interpretation of the diverse corpora, we provide two generalizations: a procedure for bootstrapping Givenness Hierarchies for other languages, and possible cross-linguistically applicable tendencies in the systems of referring expressions.
%U https://aclanthology.org/2025.crac-1.3/
%P 24-41
Markdown (Informal)
[Revisiting the Givenness Hierarchy. A Corpus-Based Evaluation](https://aclanthology.org/2025.crac-1.3/) (Chiarcos, CRAC 2025)
ACL