@inproceedings{feldkamp-etal-2025-fact,
title = "Fact from Fiction: Finding Serialized Novels in Newspapers",
author = "Feldkamp, Pascale and
Lassche, Alie and
Baunvig, Katrine Fr{\o}kj{\ae}r and
Nielbo, Kristoffer and
Bizzoni, Yuri",
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-srw.45/",
doi = "10.18653/v1/2025.acl-srw.45",
pages = "695--707",
ISBN = "979-8-89176-254-1",
abstract = "Digitized literary corpora of the 19th century favor canonical and novelistic forms, sidelining a broader and more diverse literary production. Serialized fiction {--} widely read but embedded in newspapers {--} remains especially underexplored, particularly in low-resource languages like Danish. This paper addresses this gap by developing methods to identify fiction in digitized Danish newspapers (1818{--}1848).We (1) introduce a manually annotated dataset of 1,394 articles and (2) evaluate classification pipelines using both selected linguistic features and embeddings, achieving F1-scores of up to 0.91. Finally, we (3) analyze feuilleton fiction via interpretable features to test its drift in discourse from neighboring nonfiction.Our results support the construction of alternative literary corpora and contribute to ongoing work on modeling the fiction{--}nonfiction boundary by operationalizing discourse-level distinctions at scale."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="feldkamp-etal-2025-fact">
<titleInfo>
<title>Fact from Fiction: Finding Serialized Novels in Newspapers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pascale</namePart>
<namePart type="family">Feldkamp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alie</namePart>
<namePart type="family">Lassche</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katrine</namePart>
<namePart type="given">Frøkjær</namePart>
<namePart type="family">Baunvig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristoffer</namePart>
<namePart type="family">Nielbo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-254-1</identifier>
</relatedItem>
<abstract>Digitized literary corpora of the 19th century favor canonical and novelistic forms, sidelining a broader and more diverse literary production. Serialized fiction – widely read but embedded in newspapers – remains especially underexplored, particularly in low-resource languages like Danish. This paper addresses this gap by developing methods to identify fiction in digitized Danish newspapers (1818–1848).We (1) introduce a manually annotated dataset of 1,394 articles and (2) evaluate classification pipelines using both selected linguistic features and embeddings, achieving F1-scores of up to 0.91. Finally, we (3) analyze feuilleton fiction via interpretable features to test its drift in discourse from neighboring nonfiction.Our results support the construction of alternative literary corpora and contribute to ongoing work on modeling the fiction–nonfiction boundary by operationalizing discourse-level distinctions at scale.</abstract>
<identifier type="citekey">feldkamp-etal-2025-fact</identifier>
<identifier type="doi">10.18653/v1/2025.acl-srw.45</identifier>
<location>
<url>https://aclanthology.org/2025.acl-srw.45/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>695</start>
<end>707</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fact from Fiction: Finding Serialized Novels in Newspapers
%A Feldkamp, Pascale
%A Lassche, Alie
%A Baunvig, Katrine Frøkjær
%A Nielbo, Kristoffer
%A Bizzoni, Yuri
%Y Zhao, Jin
%Y Wang, Mingyang
%Y Liu, Zhu
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-254-1
%F feldkamp-etal-2025-fact
%X Digitized literary corpora of the 19th century favor canonical and novelistic forms, sidelining a broader and more diverse literary production. Serialized fiction – widely read but embedded in newspapers – remains especially underexplored, particularly in low-resource languages like Danish. This paper addresses this gap by developing methods to identify fiction in digitized Danish newspapers (1818–1848).We (1) introduce a manually annotated dataset of 1,394 articles and (2) evaluate classification pipelines using both selected linguistic features and embeddings, achieving F1-scores of up to 0.91. Finally, we (3) analyze feuilleton fiction via interpretable features to test its drift in discourse from neighboring nonfiction.Our results support the construction of alternative literary corpora and contribute to ongoing work on modeling the fiction–nonfiction boundary by operationalizing discourse-level distinctions at scale.
%R 10.18653/v1/2025.acl-srw.45
%U https://aclanthology.org/2025.acl-srw.45/
%U https://doi.org/10.18653/v1/2025.acl-srw.45
%P 695-707
Markdown (Informal)
[Fact from Fiction: Finding Serialized Novels in Newspapers](https://aclanthology.org/2025.acl-srw.45/) (Feldkamp et al., ACL 2025)
ACL
- Pascale Feldkamp, Alie Lassche, Katrine Frøkjær Baunvig, Kristoffer Nielbo, and Yuri Bizzoni. 2025. Fact from Fiction: Finding Serialized Novels in Newspapers. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 695–707, Vienna, Austria. Association for Computational Linguistics.