@inproceedings{sood-etal-2020-interpreting,
title = "Interpreting Attention Models with Human Visual Attention in Machine Reading Comprehension",
author = "Sood, Ekta and
Tannert, Simon and
Frassinelli, Diego and
Bulling, Andreas and
Vu, Ngoc Thang",
editor = "Fern{\'a}ndez, Raquel and
Linzen, Tal",
booktitle = "Proceedings of the 24th Conference on Computational Natural Language Learning",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.conll-1.2",
doi = "10.18653/v1/2020.conll-1.2",
pages = "12--25",
abstract = "While neural networks with attention mechanisms have achieved superior performance on many natural language processing tasks, it remains unclear to which extent learned attention resembles human visual attention. In this paper, we propose a new method that leverages eye-tracking data to investigate the relationship between human visual attention and neural attention in machine reading comprehension. To this end, we introduce a novel 23 participant eye tracking dataset - MQA-RC, in which participants read movie plots and answered pre-defined questions. We compare state of the art networks based on long short-term memory (LSTM), convolutional neural models (CNN) and XLNet Transformer architectures. We find that higher similarity to human attention and performance significantly correlates to the LSTM and CNN models. However, we show this relationship does not hold true for the XLNet models {--} despite the fact that the XLNet performs best on this challenging task. Our results suggest that different architectures seem to learn rather different neural attention strategies and similarity of neural to human attention does not guarantee best performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sood-etal-2020-interpreting">
<titleInfo>
<title>Interpreting Attention Models with Human Visual Attention in Machine Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekta</namePart>
<namePart type="family">Sood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Tannert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Frassinelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Bulling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ngoc</namePart>
<namePart type="given">Thang</namePart>
<namePart type="family">Vu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While neural networks with attention mechanisms have achieved superior performance on many natural language processing tasks, it remains unclear to which extent learned attention resembles human visual attention. In this paper, we propose a new method that leverages eye-tracking data to investigate the relationship between human visual attention and neural attention in machine reading comprehension. To this end, we introduce a novel 23 participant eye tracking dataset - MQA-RC, in which participants read movie plots and answered pre-defined questions. We compare state of the art networks based on long short-term memory (LSTM), convolutional neural models (CNN) and XLNet Transformer architectures. We find that higher similarity to human attention and performance significantly correlates to the LSTM and CNN models. However, we show this relationship does not hold true for the XLNet models – despite the fact that the XLNet performs best on this challenging task. Our results suggest that different architectures seem to learn rather different neural attention strategies and similarity of neural to human attention does not guarantee best performance.</abstract>
<identifier type="citekey">sood-etal-2020-interpreting</identifier>
<identifier type="doi">10.18653/v1/2020.conll-1.2</identifier>
<location>
<url>https://aclanthology.org/2020.conll-1.2</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>12</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Interpreting Attention Models with Human Visual Attention in Machine Reading Comprehension
%A Sood, Ekta
%A Tannert, Simon
%A Frassinelli, Diego
%A Bulling, Andreas
%A Vu, Ngoc Thang
%Y Fernández, Raquel
%Y Linzen, Tal
%S Proceedings of the 24th Conference on Computational Natural Language Learning
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F sood-etal-2020-interpreting
%X While neural networks with attention mechanisms have achieved superior performance on many natural language processing tasks, it remains unclear to which extent learned attention resembles human visual attention. In this paper, we propose a new method that leverages eye-tracking data to investigate the relationship between human visual attention and neural attention in machine reading comprehension. To this end, we introduce a novel 23 participant eye tracking dataset - MQA-RC, in which participants read movie plots and answered pre-defined questions. We compare state of the art networks based on long short-term memory (LSTM), convolutional neural models (CNN) and XLNet Transformer architectures. We find that higher similarity to human attention and performance significantly correlates to the LSTM and CNN models. However, we show this relationship does not hold true for the XLNet models – despite the fact that the XLNet performs best on this challenging task. Our results suggest that different architectures seem to learn rather different neural attention strategies and similarity of neural to human attention does not guarantee best performance.
%R 10.18653/v1/2020.conll-1.2
%U https://aclanthology.org/2020.conll-1.2
%U https://doi.org/10.18653/v1/2020.conll-1.2
%P 12-25
Markdown (Informal)
[Interpreting Attention Models with Human Visual Attention in Machine Reading Comprehension](https://aclanthology.org/2020.conll-1.2) (Sood et al., CoNLL 2020)
ACL