@inproceedings{grasso-etal-2024-nytac,
title = "{NYTAC}-{CC}: A Climate Change Subcorpus of {N}ew {Y}ork {T}imes Articles",
author = "Grasso, Francesca and
Patz, Ronny and
Stede, Manfred",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.48/",
pages = "403--409",
ISBN = "979-12-210-7060-6",
abstract = "Over the past decade, the analysis of discourses on climate change (CC) has gained increased interest within the social sciences and the NLP community. Textual resources are crucial for understanding how narratives about this phenomenon are crafted and delivered. However, there still is a scarcity of datasets that cover CC in news media in a representative way. This paper presents a CC-specific subcorpus extracted from the 1.8 million New York Times Annotated Corpus, marking the first CC analysis on this data. The subcorpus was created by combining different methods for text selection to ensure representativeness and reliability, which is further validated using ClimateBERT. To provide initial insights into the CC subcorpus, we discuss the results of a topic modeling experiment (LDA). These show the diversity of contexts in which CC is discussed in news media over time, which is relevant for various downstream tasks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="grasso-etal-2024-nytac">
<titleInfo>
<title>NYTAC-CC: A Climate Change Subcorpus of New York Times Articles</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francesca</namePart>
<namePart type="family">Grasso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronny</namePart>
<namePart type="family">Patz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manfred</namePart>
<namePart type="family">Stede</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>Over the past decade, the analysis of discourses on climate change (CC) has gained increased interest within the social sciences and the NLP community. Textual resources are crucial for understanding how narratives about this phenomenon are crafted and delivered. However, there still is a scarcity of datasets that cover CC in news media in a representative way. This paper presents a CC-specific subcorpus extracted from the 1.8 million New York Times Annotated Corpus, marking the first CC analysis on this data. The subcorpus was created by combining different methods for text selection to ensure representativeness and reliability, which is further validated using ClimateBERT. To provide initial insights into the CC subcorpus, we discuss the results of a topic modeling experiment (LDA). These show the diversity of contexts in which CC is discussed in news media over time, which is relevant for various downstream tasks.</abstract>
<identifier type="citekey">grasso-etal-2024-nytac</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.48/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>403</start>
<end>409</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NYTAC-CC: A Climate Change Subcorpus of New York Times Articles
%A Grasso, Francesca
%A Patz, Ronny
%A Stede, Manfred
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F grasso-etal-2024-nytac
%X Over the past decade, the analysis of discourses on climate change (CC) has gained increased interest within the social sciences and the NLP community. Textual resources are crucial for understanding how narratives about this phenomenon are crafted and delivered. However, there still is a scarcity of datasets that cover CC in news media in a representative way. This paper presents a CC-specific subcorpus extracted from the 1.8 million New York Times Annotated Corpus, marking the first CC analysis on this data. The subcorpus was created by combining different methods for text selection to ensure representativeness and reliability, which is further validated using ClimateBERT. To provide initial insights into the CC subcorpus, we discuss the results of a topic modeling experiment (LDA). These show the diversity of contexts in which CC is discussed in news media over time, which is relevant for various downstream tasks.
%U https://aclanthology.org/2024.clicit-1.48/
%P 403-409
Markdown (Informal)
[NYTAC-CC: A Climate Change Subcorpus of New York Times Articles](https://aclanthology.org/2024.clicit-1.48/) (Grasso et al., CLiC-it 2024)
ACL