@inproceedings{luong-etal-2025-vinumfcr,
title = "{V}i{N}um{FCR}: A Novel {V}ietnamese Benchmark for Numerical Reasoning Fact Checking on Social Media News",
author = "Luong, Nhi Ngoc Phuong and
Le, Anh Thi Lan and
Huynh, Tin Van and
Nguyen, Kiet Van and
Nguyen, Ngan",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.inlg-main.9/",
pages = "134--147",
abstract = "In the digital era, the internet provides rapid and convenient access to vast amounts of information. However, much of this information remains unverified, particularly with the increasing prevalence of falsified numerical data, leading to public confusion and negative societal impacts. To address this issue, we developed ViNumFCR, a first dataset dedicated to fact-checking numerical information in Vietnamese. Comprising over 10,000 samples collected and constructed from online newspaper across 12 different topics. We assessed the performance of various fact-checking models, including Pretrained Language Models and Large Language Models, alongside retrieval techniques for gathering supporting evidence. Experimental results demonstrate that the XLM-R{\_}Large model achieved the highest accuracy of 90.05{\%} on the fact-checking task, while the combined SBERT + BM25 model attained a precision of over 97{\%} on the evidence retrieval task. Additionally, we conducted an in-depth analysis of the linguistic features of the dataset to understand the factors influencing the performance models. The ViNumFCR dataset is publicly available to support further research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luong-etal-2025-vinumfcr">
<titleInfo>
<title>ViNumFCR: A Novel Vietnamese Benchmark for Numerical Reasoning Fact Checking on Social Media News</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nhi</namePart>
<namePart type="given">Ngoc</namePart>
<namePart type="given">Phuong</namePart>
<namePart type="family">Luong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anh</namePart>
<namePart type="given">Thi</namePart>
<namePart type="given">Lan</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tin</namePart>
<namePart type="given">Van</namePart>
<namePart type="family">Huynh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiet</namePart>
<namePart type="given">Van</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ngan</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashi</namePart>
<namePart type="family">Narayan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lê</namePart>
<namePart type="given">Hồng</namePart>
<namePart type="family">Phương</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahuan</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hanoi, Vietnam</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the digital era, the internet provides rapid and convenient access to vast amounts of information. However, much of this information remains unverified, particularly with the increasing prevalence of falsified numerical data, leading to public confusion and negative societal impacts. To address this issue, we developed ViNumFCR, a first dataset dedicated to fact-checking numerical information in Vietnamese. Comprising over 10,000 samples collected and constructed from online newspaper across 12 different topics. We assessed the performance of various fact-checking models, including Pretrained Language Models and Large Language Models, alongside retrieval techniques for gathering supporting evidence. Experimental results demonstrate that the XLM-R_Large model achieved the highest accuracy of 90.05% on the fact-checking task, while the combined SBERT + BM25 model attained a precision of over 97% on the evidence retrieval task. Additionally, we conducted an in-depth analysis of the linguistic features of the dataset to understand the factors influencing the performance models. The ViNumFCR dataset is publicly available to support further research.</abstract>
<identifier type="citekey">luong-etal-2025-vinumfcr</identifier>
<location>
<url>https://aclanthology.org/2025.inlg-main.9/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>134</start>
<end>147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ViNumFCR: A Novel Vietnamese Benchmark for Numerical Reasoning Fact Checking on Social Media News
%A Luong, Nhi Ngoc Phuong
%A Le, Anh Thi Lan
%A Huynh, Tin Van
%A Nguyen, Kiet Van
%A Nguyen, Ngan
%Y Flek, Lucie
%Y Narayan, Shashi
%Y Phương, Lê Hồng
%Y Pei, Jiahuan
%S Proceedings of the 18th International Natural Language Generation Conference
%D 2025
%8 October
%I Association for Computational Linguistics
%C Hanoi, Vietnam
%F luong-etal-2025-vinumfcr
%X In the digital era, the internet provides rapid and convenient access to vast amounts of information. However, much of this information remains unverified, particularly with the increasing prevalence of falsified numerical data, leading to public confusion and negative societal impacts. To address this issue, we developed ViNumFCR, a first dataset dedicated to fact-checking numerical information in Vietnamese. Comprising over 10,000 samples collected and constructed from online newspaper across 12 different topics. We assessed the performance of various fact-checking models, including Pretrained Language Models and Large Language Models, alongside retrieval techniques for gathering supporting evidence. Experimental results demonstrate that the XLM-R_Large model achieved the highest accuracy of 90.05% on the fact-checking task, while the combined SBERT + BM25 model attained a precision of over 97% on the evidence retrieval task. Additionally, we conducted an in-depth analysis of the linguistic features of the dataset to understand the factors influencing the performance models. The ViNumFCR dataset is publicly available to support further research.
%U https://aclanthology.org/2025.inlg-main.9/
%P 134-147
Markdown (Informal)
[ViNumFCR: A Novel Vietnamese Benchmark for Numerical Reasoning Fact Checking on Social Media News](https://aclanthology.org/2025.inlg-main.9/) (Luong et al., INLG 2025)
ACL