@inproceedings{jin-etal-2023-adversarial,
title = "Adversarial Robustness for Large Language {NER} models using Disentanglement and Word Attributions",
author = "Jin, Xiaomeng and
Vinzamuri, Bhanukiran and
Venkatapathy, Sriram and
Ji, Heng and
Natarajan, Pradeep",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.830",
doi = "10.18653/v1/2023.findings-emnlp.830",
pages = "12437--12450",
abstract = "Large language models (LLM{'}s) have been widely used for several applications such as question answering, text classification and clustering. While the preliminary results across the aforementioned tasks looks promising, recent work has dived deep into LLM{'}s performing poorly for complex Named Entity Recognition (NER) tasks in comparison to fine-tuned pre-trained language models (PLM{'}s). To enhance wider adoption of LLM{'}s, our paper investigates the robustness of such LLM NER models and its instruction fine-tuned variants to adversarial attacks. In particular, we propose a novel attack which relies on disentanglement and word attribution techniques where the former aids in learning an embedding capturing both entity and non-entity influences separately, and the latter aids in identifying important words across both components. This is in stark contrast to most techniques which primarily leverage non-entity words for perturbations limiting the space being explored to synthesize effective adversarial examples. Adversarial training results based on our method improves the F1 score over original LLM NER model by 8{\%} and 18{\%} on CoNLL-2003 and Ontonotes 5.0 datasets respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jin-etal-2023-adversarial">
<titleInfo>
<title>Adversarial Robustness for Large Language NER models using Disentanglement and Word Attributions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaomeng</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhanukiran</namePart>
<namePart type="family">Vinzamuri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sriram</namePart>
<namePart type="family">Venkatapathy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pradeep</namePart>
<namePart type="family">Natarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLM’s) have been widely used for several applications such as question answering, text classification and clustering. While the preliminary results across the aforementioned tasks looks promising, recent work has dived deep into LLM’s performing poorly for complex Named Entity Recognition (NER) tasks in comparison to fine-tuned pre-trained language models (PLM’s). To enhance wider adoption of LLM’s, our paper investigates the robustness of such LLM NER models and its instruction fine-tuned variants to adversarial attacks. In particular, we propose a novel attack which relies on disentanglement and word attribution techniques where the former aids in learning an embedding capturing both entity and non-entity influences separately, and the latter aids in identifying important words across both components. This is in stark contrast to most techniques which primarily leverage non-entity words for perturbations limiting the space being explored to synthesize effective adversarial examples. Adversarial training results based on our method improves the F1 score over original LLM NER model by 8% and 18% on CoNLL-2003 and Ontonotes 5.0 datasets respectively.</abstract>
<identifier type="citekey">jin-etal-2023-adversarial</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.830</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.830</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>12437</start>
<end>12450</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adversarial Robustness for Large Language NER models using Disentanglement and Word Attributions
%A Jin, Xiaomeng
%A Vinzamuri, Bhanukiran
%A Venkatapathy, Sriram
%A Ji, Heng
%A Natarajan, Pradeep
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F jin-etal-2023-adversarial
%X Large language models (LLM’s) have been widely used for several applications such as question answering, text classification and clustering. While the preliminary results across the aforementioned tasks looks promising, recent work has dived deep into LLM’s performing poorly for complex Named Entity Recognition (NER) tasks in comparison to fine-tuned pre-trained language models (PLM’s). To enhance wider adoption of LLM’s, our paper investigates the robustness of such LLM NER models and its instruction fine-tuned variants to adversarial attacks. In particular, we propose a novel attack which relies on disentanglement and word attribution techniques where the former aids in learning an embedding capturing both entity and non-entity influences separately, and the latter aids in identifying important words across both components. This is in stark contrast to most techniques which primarily leverage non-entity words for perturbations limiting the space being explored to synthesize effective adversarial examples. Adversarial training results based on our method improves the F1 score over original LLM NER model by 8% and 18% on CoNLL-2003 and Ontonotes 5.0 datasets respectively.
%R 10.18653/v1/2023.findings-emnlp.830
%U https://aclanthology.org/2023.findings-emnlp.830
%U https://doi.org/10.18653/v1/2023.findings-emnlp.830
%P 12437-12450
Markdown (Informal)
[Adversarial Robustness for Large Language NER models using Disentanglement and Word Attributions](https://aclanthology.org/2023.findings-emnlp.830) (Jin et al., Findings 2023)
ACL