@inproceedings{pawlowski-walkowiak-2024-nlp,
title = "{NLP} for Digital Humanities: Processing Chronological Text Corpora",
author = "Paw{\l}owski, Adam and
Walkowiak, Tomasz",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.10",
pages = "105--112",
abstract = "The paper focuses on the integration of Natural Language Processing (NLP) techniques to analyze extensive chronological text corpora. This research underscores the synergy between humanistic inquiry and computational methods, especially in the processing and analysis of sequential textual data known as lexical series. A reference workflow for chronological corpus analysis is introduced, outlining the methodologies applicable to the ChronoPress corpus, a data set that encompasses 22 years of Polish press from 1945 to 1966. The study showcases the potential of this approach in uncovering cultural and historical patterns through the analysis of lexical series. The findings highlight both the challenges and opportunities present in leveraging lexical series analysis within Digital Humanities, emphasizing the necessity for advanced data filtering and anomaly detection algorithms to effectively manage the vast and intricate datasets characteristic of this field.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pawlowski-walkowiak-2024-nlp">
<titleInfo>
<title>NLP for Digital Humanities: Processing Chronological Text Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Pawłowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomasz</namePart>
<namePart type="family">Walkowiak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper focuses on the integration of Natural Language Processing (NLP) techniques to analyze extensive chronological text corpora. This research underscores the synergy between humanistic inquiry and computational methods, especially in the processing and analysis of sequential textual data known as lexical series. A reference workflow for chronological corpus analysis is introduced, outlining the methodologies applicable to the ChronoPress corpus, a data set that encompasses 22 years of Polish press from 1945 to 1966. The study showcases the potential of this approach in uncovering cultural and historical patterns through the analysis of lexical series. The findings highlight both the challenges and opportunities present in leveraging lexical series analysis within Digital Humanities, emphasizing the necessity for advanced data filtering and anomaly detection algorithms to effectively manage the vast and intricate datasets characteristic of this field.</abstract>
<identifier type="citekey">pawlowski-walkowiak-2024-nlp</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.10</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>105</start>
<end>112</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NLP for Digital Humanities: Processing Chronological Text Corpora
%A Pawłowski, Adam
%A Walkowiak, Tomasz
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F pawlowski-walkowiak-2024-nlp
%X The paper focuses on the integration of Natural Language Processing (NLP) techniques to analyze extensive chronological text corpora. This research underscores the synergy between humanistic inquiry and computational methods, especially in the processing and analysis of sequential textual data known as lexical series. A reference workflow for chronological corpus analysis is introduced, outlining the methodologies applicable to the ChronoPress corpus, a data set that encompasses 22 years of Polish press from 1945 to 1966. The study showcases the potential of this approach in uncovering cultural and historical patterns through the analysis of lexical series. The findings highlight both the challenges and opportunities present in leveraging lexical series analysis within Digital Humanities, emphasizing the necessity for advanced data filtering and anomaly detection algorithms to effectively manage the vast and intricate datasets characteristic of this field.
%U https://aclanthology.org/2024.nlp4dh-1.10
%P 105-112
Markdown (Informal)
[NLP for Digital Humanities: Processing Chronological Text Corpora](https://aclanthology.org/2024.nlp4dh-1.10) (Pawłowski & Walkowiak, NLP4DH 2024)
ACL