@inproceedings{wadhwa-etal-2025-northeastern,
title = "Northeastern Uni at Multilingual Counterspeech Generation: Enhancing Counter Speech Generation with {LLM} Alignment through Direct Preference Optimization",
author = "Wadhwa, Sahil and
Xu, Chengtian and
Chen, Haoming and
Mahalingam, Aakash and
Kar, Akankshya and
Chaudhary, Divya",
editor = "Bonaldi, Helena and
Vallecillo-Rodr{\'i}guez, Mar{\'i}a Estrella and
Zubiaga, Irune and
Montejo-R{\'a}ez, Arturo and
Soroa, Aitor and
Mart{\'i}n-Valdivia, Mar{\'i}a Teresa and
Guerini, Marco and
Agerri, Rodrigo",
booktitle = "Proceedings of the First Workshop on Multilingual Counterspeech Generation",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.mcg-1.3/",
pages = "19--28",
abstract = "The automatic generation of counter-speech (CS) is a critical strategy for addressing hate speech by providing constructive and informed responses. However, existing methods often fail to generate high-quality, impactful, and scalable CS, particularly across diverse lin- guistic contexts. In this paper, we propose a novel methodology to enhance CS generation by aligning Large Language Models (LLMs) using Supervised Fine-Tuning (SFT) and Di- rect Preference Optimization (DPO). Our ap- proach leverages DPO to align LLM outputs with human preferences, ensuring contextu- ally appropriate and linguistically adaptable responses. Additionally, we incorporate knowl- edge grounding to enhance the factual accuracy and relevance of generated CS. Experimental results demonstrate that DPO-aligned models significantly outperform SFT baselines on CS benchmarks while scaling effectively to mul- tiple languages. These findings highlight the potential of preference-based alignment tech- niques to advance CS generation across var- ied linguistic settings. The model supervision and alignment is done in English and the same model is used for reporting metrics across other languages like Basque, Italian, and Spanish."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wadhwa-etal-2025-northeastern">
<titleInfo>
<title>Northeastern Uni at Multilingual Counterspeech Generation: Enhancing Counter Speech Generation with LLM Alignment through Direct Preference Optimization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sahil</namePart>
<namePart type="family">Wadhwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengtian</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoming</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakash</namePart>
<namePart type="family">Mahalingam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akankshya</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Divya</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Multilingual Counterspeech Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Bonaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">María</namePart>
<namePart type="given">Estrella</namePart>
<namePart type="family">Vallecillo-Rodríguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irune</namePart>
<namePart type="family">Zubiaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Montejo-Ráez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aitor</namePart>
<namePart type="family">Soroa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">María</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Martín-Valdivia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Guerini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rodrigo</namePart>
<namePart type="family">Agerri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The automatic generation of counter-speech (CS) is a critical strategy for addressing hate speech by providing constructive and informed responses. However, existing methods often fail to generate high-quality, impactful, and scalable CS, particularly across diverse lin- guistic contexts. In this paper, we propose a novel methodology to enhance CS generation by aligning Large Language Models (LLMs) using Supervised Fine-Tuning (SFT) and Di- rect Preference Optimization (DPO). Our ap- proach leverages DPO to align LLM outputs with human preferences, ensuring contextu- ally appropriate and linguistically adaptable responses. Additionally, we incorporate knowl- edge grounding to enhance the factual accuracy and relevance of generated CS. Experimental results demonstrate that DPO-aligned models significantly outperform SFT baselines on CS benchmarks while scaling effectively to mul- tiple languages. These findings highlight the potential of preference-based alignment tech- niques to advance CS generation across var- ied linguistic settings. The model supervision and alignment is done in English and the same model is used for reporting metrics across other languages like Basque, Italian, and Spanish.</abstract>
<identifier type="citekey">wadhwa-etal-2025-northeastern</identifier>
<location>
<url>https://aclanthology.org/2025.mcg-1.3/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>19</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Northeastern Uni at Multilingual Counterspeech Generation: Enhancing Counter Speech Generation with LLM Alignment through Direct Preference Optimization
%A Wadhwa, Sahil
%A Xu, Chengtian
%A Chen, Haoming
%A Mahalingam, Aakash
%A Kar, Akankshya
%A Chaudhary, Divya
%Y Bonaldi, Helena
%Y Vallecillo-Rodríguez, María Estrella
%Y Zubiaga, Irune
%Y Montejo-Ráez, Arturo
%Y Soroa, Aitor
%Y Martín-Valdivia, María Teresa
%Y Guerini, Marco
%Y Agerri, Rodrigo
%S Proceedings of the First Workshop on Multilingual Counterspeech Generation
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F wadhwa-etal-2025-northeastern
%X The automatic generation of counter-speech (CS) is a critical strategy for addressing hate speech by providing constructive and informed responses. However, existing methods often fail to generate high-quality, impactful, and scalable CS, particularly across diverse lin- guistic contexts. In this paper, we propose a novel methodology to enhance CS generation by aligning Large Language Models (LLMs) using Supervised Fine-Tuning (SFT) and Di- rect Preference Optimization (DPO). Our ap- proach leverages DPO to align LLM outputs with human preferences, ensuring contextu- ally appropriate and linguistically adaptable responses. Additionally, we incorporate knowl- edge grounding to enhance the factual accuracy and relevance of generated CS. Experimental results demonstrate that DPO-aligned models significantly outperform SFT baselines on CS benchmarks while scaling effectively to mul- tiple languages. These findings highlight the potential of preference-based alignment tech- niques to advance CS generation across var- ied linguistic settings. The model supervision and alignment is done in English and the same model is used for reporting metrics across other languages like Basque, Italian, and Spanish.
%U https://aclanthology.org/2025.mcg-1.3/
%P 19-28
Markdown (Informal)
[Northeastern Uni at Multilingual Counterspeech Generation: Enhancing Counter Speech Generation with LLM Alignment through Direct Preference Optimization](https://aclanthology.org/2025.mcg-1.3/) (Wadhwa et al., MCG 2025)
ACL