@inproceedings{zarharan-etal-2021-parsfever,
title = "{P}ars{FEVER}: a Dataset for {F}arsi Fact Extraction and Verification",
author = "Zarharan, Majid and
Ghaderan, Mahsa and
Pourdabiri, Amin and
Sayedi, Zahra and
Minaei-Bidgoli, Behrouz and
Eetemadi, Sauleh and
Pilehvar, Mohammad Taher",
editor = "Ku, Lun-Wei and
Nastase, Vivi and
Vuli{\'c}, Ivan",
booktitle = "Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.starsem-1.9",
doi = "10.18653/v1/2021.starsem-1.9",
pages = "99--104",
abstract = "Training and evaluation of automatic fact extraction and verification techniques require large amounts of annotated data which might not be available for low-resource languages. This paper presents ParsFEVER: the first publicly available Farsi dataset for fact extraction and verification. We adopt the construction procedure of the standard English dataset for the task, i.e., FEVER, and improve it for the case of low-resource languages. Specifically, claims are extracted from sentences that are carefully selected to be more informative. The dataset comprises nearly 23K manually-annotated claims. Over 65{\%} of the claims in ParsFEVER are many-hop (require evidence from multiple sources), making the dataset a challenging benchmark (only 13{\%} of the claims in FEVER are many-hop). Also, despite having a smaller training set (around one-ninth of that in Fever), a model trained on ParsFEVER attains similar downstream performance, indicating the quality of the dataset. We release the dataset and the annotation guidelines at \url{https://github.com/Zarharan/ParsFEVER}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zarharan-etal-2021-parsfever">
<titleInfo>
<title>ParsFEVER: a Dataset for Farsi Fact Extraction and Verification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Majid</namePart>
<namePart type="family">Zarharan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahsa</namePart>
<namePart type="family">Ghaderan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amin</namePart>
<namePart type="family">Pourdabiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zahra</namePart>
<namePart type="family">Sayedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Behrouz</namePart>
<namePart type="family">Minaei-Bidgoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sauleh</namePart>
<namePart type="family">Eetemadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivi</namePart>
<namePart type="family">Nastase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Training and evaluation of automatic fact extraction and verification techniques require large amounts of annotated data which might not be available for low-resource languages. This paper presents ParsFEVER: the first publicly available Farsi dataset for fact extraction and verification. We adopt the construction procedure of the standard English dataset for the task, i.e., FEVER, and improve it for the case of low-resource languages. Specifically, claims are extracted from sentences that are carefully selected to be more informative. The dataset comprises nearly 23K manually-annotated claims. Over 65% of the claims in ParsFEVER are many-hop (require evidence from multiple sources), making the dataset a challenging benchmark (only 13% of the claims in FEVER are many-hop). Also, despite having a smaller training set (around one-ninth of that in Fever), a model trained on ParsFEVER attains similar downstream performance, indicating the quality of the dataset. We release the dataset and the annotation guidelines at https://github.com/Zarharan/ParsFEVER.</abstract>
<identifier type="citekey">zarharan-etal-2021-parsfever</identifier>
<identifier type="doi">10.18653/v1/2021.starsem-1.9</identifier>
<location>
<url>https://aclanthology.org/2021.starsem-1.9</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>99</start>
<end>104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ParsFEVER: a Dataset for Farsi Fact Extraction and Verification
%A Zarharan, Majid
%A Ghaderan, Mahsa
%A Pourdabiri, Amin
%A Sayedi, Zahra
%A Minaei-Bidgoli, Behrouz
%A Eetemadi, Sauleh
%A Pilehvar, Mohammad Taher
%Y Ku, Lun-Wei
%Y Nastase, Vivi
%Y Vulić, Ivan
%S Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F zarharan-etal-2021-parsfever
%X Training and evaluation of automatic fact extraction and verification techniques require large amounts of annotated data which might not be available for low-resource languages. This paper presents ParsFEVER: the first publicly available Farsi dataset for fact extraction and verification. We adopt the construction procedure of the standard English dataset for the task, i.e., FEVER, and improve it for the case of low-resource languages. Specifically, claims are extracted from sentences that are carefully selected to be more informative. The dataset comprises nearly 23K manually-annotated claims. Over 65% of the claims in ParsFEVER are many-hop (require evidence from multiple sources), making the dataset a challenging benchmark (only 13% of the claims in FEVER are many-hop). Also, despite having a smaller training set (around one-ninth of that in Fever), a model trained on ParsFEVER attains similar downstream performance, indicating the quality of the dataset. We release the dataset and the annotation guidelines at https://github.com/Zarharan/ParsFEVER.
%R 10.18653/v1/2021.starsem-1.9
%U https://aclanthology.org/2021.starsem-1.9
%U https://doi.org/10.18653/v1/2021.starsem-1.9
%P 99-104
Markdown (Informal)
[ParsFEVER: a Dataset for Farsi Fact Extraction and Verification](https://aclanthology.org/2021.starsem-1.9) (Zarharan et al., *SEM 2021)
ACL
- Majid Zarharan, Mahsa Ghaderan, Amin Pourdabiri, Zahra Sayedi, Behrouz Minaei-Bidgoli, Sauleh Eetemadi, and Mohammad Taher Pilehvar. 2021. ParsFEVER: a Dataset for Farsi Fact Extraction and Verification. In Proceedings of *SEM 2021: The Tenth Joint Conference on Lexical and Computational Semantics, pages 99–104, Online. Association for Computational Linguistics.