@inproceedings{ryu-2023-plausibility,
title = "Plausibility Processing in Transformer Language Models: Focusing on the Role of Attention Heads in {GPT}",
author = "Ryu, Soo",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.27",
doi = "10.18653/v1/2023.findings-emnlp.27",
pages = "356--369",
abstract = "The goal of this paper is to explore how Transformer language models process semantic knowledge, especially regarding the plausibility of noun-verb relations. First, I demonstrate GPT2 exhibits a higher degree of similarity with humans in plausibility processing compared to other Transformer language models. Next, I delve into how knowledge of plausibility is contained within attention heads of GPT2 and how these heads causally contribute to GPT2{'}s plausibility processing ability. Through several experiments, it was found that: i) GPT2 has a number of attention heads that detect plausible noun-verb relationships; ii) these heads collectively contribute to the Transformer{'}s ability to process plausibility, albeit to varying degrees; and iii) attention heads{'} individual performance in detecting plausibility does not necessarily correlate with how much they contribute to GPT2{'}s plausibility processing ability.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ryu-2023-plausibility">
<titleInfo>
<title>Plausibility Processing in Transformer Language Models: Focusing on the Role of Attention Heads in GPT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The goal of this paper is to explore how Transformer language models process semantic knowledge, especially regarding the plausibility of noun-verb relations. First, I demonstrate GPT2 exhibits a higher degree of similarity with humans in plausibility processing compared to other Transformer language models. Next, I delve into how knowledge of plausibility is contained within attention heads of GPT2 and how these heads causally contribute to GPT2’s plausibility processing ability. Through several experiments, it was found that: i) GPT2 has a number of attention heads that detect plausible noun-verb relationships; ii) these heads collectively contribute to the Transformer’s ability to process plausibility, albeit to varying degrees; and iii) attention heads’ individual performance in detecting plausibility does not necessarily correlate with how much they contribute to GPT2’s plausibility processing ability.</abstract>
<identifier type="citekey">ryu-2023-plausibility</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.27</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.27</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>356</start>
<end>369</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Plausibility Processing in Transformer Language Models: Focusing on the Role of Attention Heads in GPT
%A Ryu, Soo
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F ryu-2023-plausibility
%X The goal of this paper is to explore how Transformer language models process semantic knowledge, especially regarding the plausibility of noun-verb relations. First, I demonstrate GPT2 exhibits a higher degree of similarity with humans in plausibility processing compared to other Transformer language models. Next, I delve into how knowledge of plausibility is contained within attention heads of GPT2 and how these heads causally contribute to GPT2’s plausibility processing ability. Through several experiments, it was found that: i) GPT2 has a number of attention heads that detect plausible noun-verb relationships; ii) these heads collectively contribute to the Transformer’s ability to process plausibility, albeit to varying degrees; and iii) attention heads’ individual performance in detecting plausibility does not necessarily correlate with how much they contribute to GPT2’s plausibility processing ability.
%R 10.18653/v1/2023.findings-emnlp.27
%U https://aclanthology.org/2023.findings-emnlp.27
%U https://doi.org/10.18653/v1/2023.findings-emnlp.27
%P 356-369
Markdown (Informal)
[Plausibility Processing in Transformer Language Models: Focusing on the Role of Attention Heads in GPT](https://aclanthology.org/2023.findings-emnlp.27) (Ryu, Findings 2023)
ACL