@inproceedings{gangal-hovy-2020-bertering,
title = "{BERT}ering {RAMS}: What and How Much does {BERT} Already Know About Event Arguments? - A Study on the {RAMS} Dataset",
author = "Gangal, Varun and
Hovy, Eduard",
editor = "Alishahi, Afra and
Belinkov, Yonatan and
Chrupa{\l}a, Grzegorz and
Hupkes, Dieuwke and
Pinter, Yuval and
Sajjad, Hassan",
booktitle = "Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.blackboxnlp-1.1",
doi = "10.18653/v1/2020.blackboxnlp-1.1",
pages = "1--10",
abstract = "Using the attention map based probing framework from (Clark et al., 2019), we observe that, on the RAMS dataset (Ebner et al., 2020), BERT{'}s attention heads have modest but well above-chance ability to spot event arguments sans any training or domain finetuning, varying from a low of 17.77{\%} for Place to a high of 51.61{\%} for Artifact. Next, we find that linear combinations of these heads, estimated with approx. 11{\%} of available total event argument detection supervision, can push performance well higher for some roles {---} highest two being Victim (68.29{\%} Accuracy) and Artifact (58.82{\%} Accuracy). Furthermore, we investigate how well our methods do for cross-sentence event arguments. We propose a procedure to isolate {``}best heads{''} for cross-sentence argument detection separately of those for intra-sentence arguments. The heads thus estimated have superior cross-sentence performance compared to their jointly estimated equivalents, albeit only under the unrealistic assumption that we already know the argument is present in another sentence. Lastly, we seek to isolate to what extent our numbers stem from lexical frequency based associations between gold arguments and roles. We propose NONCE, a scheme to create adversarial test examples by replacing gold arguments with randomly generated {``}nonce{''} words. We find that learnt linear combinations are robust to NONCE, though individual best heads can be more sensitive.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gangal-hovy-2020-bertering">
<titleInfo>
<title>BERTering RAMS: What and How Much does BERT Already Know About Event Arguments? - A Study on the RAMS Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="family">Gangal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Afra</namePart>
<namePart type="family">Alishahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Grzegorz</namePart>
<namePart type="family">Chrupała</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuval</namePart>
<namePart type="family">Pinter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sajjad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Using the attention map based probing framework from (Clark et al., 2019), we observe that, on the RAMS dataset (Ebner et al., 2020), BERT’s attention heads have modest but well above-chance ability to spot event arguments sans any training or domain finetuning, varying from a low of 17.77% for Place to a high of 51.61% for Artifact. Next, we find that linear combinations of these heads, estimated with approx. 11% of available total event argument detection supervision, can push performance well higher for some roles — highest two being Victim (68.29% Accuracy) and Artifact (58.82% Accuracy). Furthermore, we investigate how well our methods do for cross-sentence event arguments. We propose a procedure to isolate “best heads” for cross-sentence argument detection separately of those for intra-sentence arguments. The heads thus estimated have superior cross-sentence performance compared to their jointly estimated equivalents, albeit only under the unrealistic assumption that we already know the argument is present in another sentence. Lastly, we seek to isolate to what extent our numbers stem from lexical frequency based associations between gold arguments and roles. We propose NONCE, a scheme to create adversarial test examples by replacing gold arguments with randomly generated “nonce” words. We find that learnt linear combinations are robust to NONCE, though individual best heads can be more sensitive.</abstract>
<identifier type="citekey">gangal-hovy-2020-bertering</identifier>
<identifier type="doi">10.18653/v1/2020.blackboxnlp-1.1</identifier>
<location>
<url>https://aclanthology.org/2020.blackboxnlp-1.1</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERTering RAMS: What and How Much does BERT Already Know About Event Arguments? - A Study on the RAMS Dataset
%A Gangal, Varun
%A Hovy, Eduard
%Y Alishahi, Afra
%Y Belinkov, Yonatan
%Y Chrupała, Grzegorz
%Y Hupkes, Dieuwke
%Y Pinter, Yuval
%Y Sajjad, Hassan
%S Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F gangal-hovy-2020-bertering
%X Using the attention map based probing framework from (Clark et al., 2019), we observe that, on the RAMS dataset (Ebner et al., 2020), BERT’s attention heads have modest but well above-chance ability to spot event arguments sans any training or domain finetuning, varying from a low of 17.77% for Place to a high of 51.61% for Artifact. Next, we find that linear combinations of these heads, estimated with approx. 11% of available total event argument detection supervision, can push performance well higher for some roles — highest two being Victim (68.29% Accuracy) and Artifact (58.82% Accuracy). Furthermore, we investigate how well our methods do for cross-sentence event arguments. We propose a procedure to isolate “best heads” for cross-sentence argument detection separately of those for intra-sentence arguments. The heads thus estimated have superior cross-sentence performance compared to their jointly estimated equivalents, albeit only under the unrealistic assumption that we already know the argument is present in another sentence. Lastly, we seek to isolate to what extent our numbers stem from lexical frequency based associations between gold arguments and roles. We propose NONCE, a scheme to create adversarial test examples by replacing gold arguments with randomly generated “nonce” words. We find that learnt linear combinations are robust to NONCE, though individual best heads can be more sensitive.
%R 10.18653/v1/2020.blackboxnlp-1.1
%U https://aclanthology.org/2020.blackboxnlp-1.1
%U https://doi.org/10.18653/v1/2020.blackboxnlp-1.1
%P 1-10
Markdown (Informal)
[BERTering RAMS: What and How Much does BERT Already Know About Event Arguments? - A Study on the RAMS Dataset](https://aclanthology.org/2020.blackboxnlp-1.1) (Gangal & Hovy, BlackboxNLP 2020)
ACL