@inproceedings{kamruzzaman-etal-2023-banmani,
title = "{B}an{MANI}: A Dataset to Identify Manipulated Social Media News in {B}angla",
author = "Kamruzzaman, Mahammed and
Shovon, Md. Minul Islam and
Kim, Gene",
editor = "Haddad, Amal Haddad and
Terryn, Ayla Rigouts and
Mitkov, Ruslan and
Rapp, Reinhard and
Zweigenbaum, Pierre and
Sharoff, Serge",
booktitle = "Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC)",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.contents-1.7",
pages = "51--58",
abstract = "Initial work has been done to address fake news detection and misrepresentation of news in the Bengali language. However, no work in Bengali yet addresses the identification of specific claims in social media news that falsely manipulate a related news article. At this point, this problem has been tackled in English and a few other languages, but not in the Bengali language. In this paper, we curate a dataset of social media content labeled with information manipulation relative to reference articles, called BanMANI. The dataset collection method we describe works around the limitations of the available NLP tools in Bangla. We expect these techniques will carry over to building similar datasets in other low-resource languages. BanMANI forms the basis both for evaluating the capabilities of existing NLP systems and for training or fine-tuning new models specifically on this task. In our analysis, we find that this task challenges current LLMs both under zero-shot and fine-tuned set- things",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kamruzzaman-etal-2023-banmani">
<titleInfo>
<title>BanMANI: A Dataset to Identify Manipulated Social Media News in Bangla</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mahammed</namePart>
<namePart type="family">Kamruzzaman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Minul</namePart>
<namePart type="given">Islam</namePart>
<namePart type="family">Shovon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gene</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amal</namePart>
<namePart type="given">Haddad</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayla</namePart>
<namePart type="given">Rigouts</namePart>
<namePart type="family">Terryn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Initial work has been done to address fake news detection and misrepresentation of news in the Bengali language. However, no work in Bengali yet addresses the identification of specific claims in social media news that falsely manipulate a related news article. At this point, this problem has been tackled in English and a few other languages, but not in the Bengali language. In this paper, we curate a dataset of social media content labeled with information manipulation relative to reference articles, called BanMANI. The dataset collection method we describe works around the limitations of the available NLP tools in Bangla. We expect these techniques will carry over to building similar datasets in other low-resource languages. BanMANI forms the basis both for evaluating the capabilities of existing NLP systems and for training or fine-tuning new models specifically on this task. In our analysis, we find that this task challenges current LLMs both under zero-shot and fine-tuned set- things</abstract>
<identifier type="citekey">kamruzzaman-etal-2023-banmani</identifier>
<location>
<url>https://aclanthology.org/2023.contents-1.7</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>51</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BanMANI: A Dataset to Identify Manipulated Social Media News in Bangla
%A Kamruzzaman, Mahammed
%A Shovon, Md. Minul Islam
%A Kim, Gene
%Y Haddad, Amal Haddad
%Y Terryn, Ayla Rigouts
%Y Mitkov, Ruslan
%Y Rapp, Reinhard
%Y Zweigenbaum, Pierre
%Y Sharoff, Serge
%S Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC)
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F kamruzzaman-etal-2023-banmani
%X Initial work has been done to address fake news detection and misrepresentation of news in the Bengali language. However, no work in Bengali yet addresses the identification of specific claims in social media news that falsely manipulate a related news article. At this point, this problem has been tackled in English and a few other languages, but not in the Bengali language. In this paper, we curate a dataset of social media content labeled with information manipulation relative to reference articles, called BanMANI. The dataset collection method we describe works around the limitations of the available NLP tools in Bangla. We expect these techniques will carry over to building similar datasets in other low-resource languages. BanMANI forms the basis both for evaluating the capabilities of existing NLP systems and for training or fine-tuning new models specifically on this task. In our analysis, we find that this task challenges current LLMs both under zero-shot and fine-tuned set- things
%U https://aclanthology.org/2023.contents-1.7
%P 51-58
Markdown (Informal)
[BanMANI: A Dataset to Identify Manipulated Social Media News in Bangla](https://aclanthology.org/2023.contents-1.7) (Kamruzzaman et al., ConTeNTS-WS 2023)
ACL
- Mahammed Kamruzzaman, Md. Minul Islam Shovon, and Gene Kim. 2023. BanMANI: A Dataset to Identify Manipulated Social Media News in Bangla. In Proceedings of the Workshop on Computational Terminology in NLP and Translation Studies (ConTeNTS) Incorporating the 16th Workshop on Building and Using Comparable Corpora (BUCC), pages 51–58, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.