@article{temnikova-etal-2025-automatic,
title = "Automatic Detection of the {Bulgarian} Evidential Renarrative",
author = "Temnikova, Irina and
Margova, Ruslana and
Minkov, Stefan and
Stefanova, Tsvetelina and
Grigorova, Nevena and
Gargova, Silvia and
Kovatchev, Venelin",
editor = "Koeva, Svetla",
journal = "Journal Computational Linguistics in Bulgaria",
volume = "1",
month = jul,
year = "2025",
address = "Sofia, Bulgaria",
publisher = "Institute for Bulgarian Language, Department of Computational Linguistics, Bulgarian Academy of Sciences",
url = "https://aclanthology.org/2025.jclib-1.4/",
doi = "10.47810/JCLIB.1.2025.04",
pages = "61--83",
abstract = "Manual and automatic verification of the trustworthiness of information is an important task. Knowing whether the author of a statement was an eyewitness to the reported event(s) is a useful clue. In linguistics, such information is expressed through ``evidentiality''. Evidentials are especially important in Bulgarian, as Bulgarian journalists often use a specific type of evidential ({``}renarrative'') to report events that they did not directly observe, nor verify. Unfortunately, there are no automatic tools to detect Bulgarian renarrative. This article presents the first two automatic solutions for this task. Specifically - a fine-tuned BERT classifier (renarrative BERT detector, BGRenBERT), achieving 0.98 Accuracy on the test split, and a renarrative rulebased detector (BGRenRules), created with regular expressions, matching a parser{'}s output. Both solutions detect Bulgarian texts containing the most frequently encountered forms of renarrative. Additionally, we compare the results of the two detectors with the manual annotation of subsets of two Bulgarian fake text datasets. BGRenRules obtains substantially higher results than BGRenBERT. The error analysis shows that the errors from BGRenRules most frequently correspond to cases in which humans also have doubts. The training dataset (BgRenData), the annotated dataset subsets, and the two detectors are made publicly accessible on Zenodo, GitHub, and HuggingFace. We expect that these new resources will be of invaluable assistance to 1) Bulgarian-language researchers, 2) researchers of other languages with similar phenomena, especially those working on verifying information."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="temnikova-etal-2025-automatic">
<titleInfo>
<title>Automatic Detection of the Bulgarian Evidential Renarrative</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Temnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslana</namePart>
<namePart type="family">Margova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Minkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsvetelina</namePart>
<namePart type="family">Stefanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nevena</namePart>
<namePart type="family">Grigorova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silvia</namePart>
<namePart type="family">Gargova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Venelin</namePart>
<namePart type="family">Kovatchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Journal Computational Linguistics in Bulgaria</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>Institute for Bulgarian Language, Department of Computational Linguistics, Bulgarian Academy of Sciences</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Manual and automatic verification of the trustworthiness of information is an important task. Knowing whether the author of a statement was an eyewitness to the reported event(s) is a useful clue. In linguistics, such information is expressed through “evidentiality”. Evidentials are especially important in Bulgarian, as Bulgarian journalists often use a specific type of evidential (“renarrative”) to report events that they did not directly observe, nor verify. Unfortunately, there are no automatic tools to detect Bulgarian renarrative. This article presents the first two automatic solutions for this task. Specifically - a fine-tuned BERT classifier (renarrative BERT detector, BGRenBERT), achieving 0.98 Accuracy on the test split, and a renarrative rulebased detector (BGRenRules), created with regular expressions, matching a parser’s output. Both solutions detect Bulgarian texts containing the most frequently encountered forms of renarrative. Additionally, we compare the results of the two detectors with the manual annotation of subsets of two Bulgarian fake text datasets. BGRenRules obtains substantially higher results than BGRenBERT. The error analysis shows that the errors from BGRenRules most frequently correspond to cases in which humans also have doubts. The training dataset (BgRenData), the annotated dataset subsets, and the two detectors are made publicly accessible on Zenodo, GitHub, and HuggingFace. We expect that these new resources will be of invaluable assistance to 1) Bulgarian-language researchers, 2) researchers of other languages with similar phenomena, especially those working on verifying information.</abstract>
<identifier type="citekey">temnikova-etal-2025-automatic</identifier>
<identifier type="doi">10.47810/JCLIB.1.2025.04</identifier>
<location>
<url>https://aclanthology.org/2025.jclib-1.4/</url>
</location>
<part>
<date>2025-07</date>
<detail type="volume"><number>1</number></detail>
<extent unit="page">
<start>61</start>
<end>83</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Automatic Detection of the Bulgarian Evidential Renarrative
%A Temnikova, Irina
%A Margova, Ruslana
%A Minkov, Stefan
%A Stefanova, Tsvetelina
%A Grigorova, Nevena
%A Gargova, Silvia
%A Kovatchev, Venelin
%J Journal Computational Linguistics in Bulgaria
%D 2025
%8 July
%V 1
%I Institute for Bulgarian Language, Department of Computational Linguistics, Bulgarian Academy of Sciences
%C Sofia, Bulgaria
%F temnikova-etal-2025-automatic
%X Manual and automatic verification of the trustworthiness of information is an important task. Knowing whether the author of a statement was an eyewitness to the reported event(s) is a useful clue. In linguistics, such information is expressed through “evidentiality”. Evidentials are especially important in Bulgarian, as Bulgarian journalists often use a specific type of evidential (“renarrative”) to report events that they did not directly observe, nor verify. Unfortunately, there are no automatic tools to detect Bulgarian renarrative. This article presents the first two automatic solutions for this task. Specifically - a fine-tuned BERT classifier (renarrative BERT detector, BGRenBERT), achieving 0.98 Accuracy on the test split, and a renarrative rulebased detector (BGRenRules), created with regular expressions, matching a parser’s output. Both solutions detect Bulgarian texts containing the most frequently encountered forms of renarrative. Additionally, we compare the results of the two detectors with the manual annotation of subsets of two Bulgarian fake text datasets. BGRenRules obtains substantially higher results than BGRenBERT. The error analysis shows that the errors from BGRenRules most frequently correspond to cases in which humans also have doubts. The training dataset (BgRenData), the annotated dataset subsets, and the two detectors are made publicly accessible on Zenodo, GitHub, and HuggingFace. We expect that these new resources will be of invaluable assistance to 1) Bulgarian-language researchers, 2) researchers of other languages with similar phenomena, especially those working on verifying information.
%R 10.47810/JCLIB.1.2025.04
%U https://aclanthology.org/2025.jclib-1.4/
%U https://doi.org/10.47810/JCLIB.1.2025.04
%P 61-83
Markdown (Informal)
[Automatic Detection of the Bulgarian Evidential Renarrative](https://aclanthology.org/2025.jclib-1.4/) (Temnikova et al., JCLIB 2025)
ACL