@inproceedings{abdessaied-etal-2022-neuro,
title = "Neuro-Symbolic Visual Dialog",
author = "Abdessaied, Adnen and
B{\^a}ce, Mihai and
Bulling, Andreas",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.17",
pages = "192--217",
abstract = "We propose Neuro-Symbolic Visual Dialog (NSVD) {---}the first method to combine deep learning and symbolic program execution for multi-round visually-grounded reasoning. NSVD significantly outperforms existing purely-connectionist methods on two key challenges inherent to visual dialog: long-distance co-reference resolution as well as vanishing question-answering performance. We demonstrate the latter by proposing a more realistic and stricter evaluation scheme in which we use predicted answers for the full dialog history when calculating accuracy. We describe two variants of our model and show that using this new scheme, our best model achieves an accuracy of 99.72{\%} on CLEVR-Dialog{---}a relative improvement of more than 10{\%} over the state of the art{---}while only requiring a fraction of training data. Moreover, we demonstrate that our neuro-symbolic models have a higher mean first failure round, are more robust against incomplete dialog histories, and generalise better not only to dialogs that are up to three times longer than those seen during training but also to unseen question types and scenes.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdessaied-etal-2022-neuro">
<titleInfo>
<title>Neuro-Symbolic Visual Dialog</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adnen</namePart>
<namePart type="family">Abdessaied</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Bâce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Bulling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose Neuro-Symbolic Visual Dialog (NSVD) —the first method to combine deep learning and symbolic program execution for multi-round visually-grounded reasoning. NSVD significantly outperforms existing purely-connectionist methods on two key challenges inherent to visual dialog: long-distance co-reference resolution as well as vanishing question-answering performance. We demonstrate the latter by proposing a more realistic and stricter evaluation scheme in which we use predicted answers for the full dialog history when calculating accuracy. We describe two variants of our model and show that using this new scheme, our best model achieves an accuracy of 99.72% on CLEVR-Dialog—a relative improvement of more than 10% over the state of the art—while only requiring a fraction of training data. Moreover, we demonstrate that our neuro-symbolic models have a higher mean first failure round, are more robust against incomplete dialog histories, and generalise better not only to dialogs that are up to three times longer than those seen during training but also to unseen question types and scenes.</abstract>
<identifier type="citekey">abdessaied-etal-2022-neuro</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.17</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>192</start>
<end>217</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neuro-Symbolic Visual Dialog
%A Abdessaied, Adnen
%A Bâce, Mihai
%A Bulling, Andreas
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F abdessaied-etal-2022-neuro
%X We propose Neuro-Symbolic Visual Dialog (NSVD) —the first method to combine deep learning and symbolic program execution for multi-round visually-grounded reasoning. NSVD significantly outperforms existing purely-connectionist methods on two key challenges inherent to visual dialog: long-distance co-reference resolution as well as vanishing question-answering performance. We demonstrate the latter by proposing a more realistic and stricter evaluation scheme in which we use predicted answers for the full dialog history when calculating accuracy. We describe two variants of our model and show that using this new scheme, our best model achieves an accuracy of 99.72% on CLEVR-Dialog—a relative improvement of more than 10% over the state of the art—while only requiring a fraction of training data. Moreover, we demonstrate that our neuro-symbolic models have a higher mean first failure round, are more robust against incomplete dialog histories, and generalise better not only to dialogs that are up to three times longer than those seen during training but also to unseen question types and scenes.
%U https://aclanthology.org/2022.coling-1.17
%P 192-217
Markdown (Informal)
[Neuro-Symbolic Visual Dialog](https://aclanthology.org/2022.coling-1.17) (Abdessaied et al., COLING 2022)
ACL
- Adnen Abdessaied, Mihai Bâce, and Andreas Bulling. 2022. Neuro-Symbolic Visual Dialog. In Proceedings of the 29th International Conference on Computational Linguistics, pages 192–217, Gyeongju, Republic of Korea. International Committee on Computational Linguistics.