@inproceedings{xiu-etal-2022-logicnmr,
title = "{L}ogic{NMR}: Probing the Non-monotonic Reasoning Ability of Pre-trained Language Models",
author = "Xiu, Yeliang and
Xiao, Zhanhao and
Liu, Yongmei",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.265",
doi = "10.18653/v1/2022.findings-emnlp.265",
pages = "3616--3626",
abstract = "The logical reasoning capabilities of pre-trained language models have recently received much attention. As one of the vital reasoning paradigms, non-monotonic reasoning refers to the fact that conclusions may be invalidated with new information. Existing work has constructed a non-monotonic inference dataset $\delta$-NLI and explored the performance of language models on it. However, the $\delta$-NLI dataset is entangled with commonsense reasoning. In this paper, we explore the pure non-monotonic reasoning ability of pre-trained language models. We build a non-monotonic reasoning benchmark, named LogicNMR, with explicit default rules and iterative updates. In the experimental part, the performance of popular language models on LogicNMR is explored from the perspectives of accuracy, generalization, proof-based traceability and robustness. The experimental results show that even though the fine-tuned language models achieve an accuracy of more than 94.4{\%} on LogicNMR, they perform unsatisfactorily, with a significant drop, in generalization and proof-based traceability.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiu-etal-2022-logicnmr">
<titleInfo>
<title>LogicNMR: Probing the Non-monotonic Reasoning Ability of Pre-trained Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yeliang</namePart>
<namePart type="family">Xiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhanhao</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongmei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The logical reasoning capabilities of pre-trained language models have recently received much attention. As one of the vital reasoning paradigms, non-monotonic reasoning refers to the fact that conclusions may be invalidated with new information. Existing work has constructed a non-monotonic inference dataset δ-NLI and explored the performance of language models on it. However, the δ-NLI dataset is entangled with commonsense reasoning. In this paper, we explore the pure non-monotonic reasoning ability of pre-trained language models. We build a non-monotonic reasoning benchmark, named LogicNMR, with explicit default rules and iterative updates. In the experimental part, the performance of popular language models on LogicNMR is explored from the perspectives of accuracy, generalization, proof-based traceability and robustness. The experimental results show that even though the fine-tuned language models achieve an accuracy of more than 94.4% on LogicNMR, they perform unsatisfactorily, with a significant drop, in generalization and proof-based traceability.</abstract>
<identifier type="citekey">xiu-etal-2022-logicnmr</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.265</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.265</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>3616</start>
<end>3626</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LogicNMR: Probing the Non-monotonic Reasoning Ability of Pre-trained Language Models
%A Xiu, Yeliang
%A Xiao, Zhanhao
%A Liu, Yongmei
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F xiu-etal-2022-logicnmr
%X The logical reasoning capabilities of pre-trained language models have recently received much attention. As one of the vital reasoning paradigms, non-monotonic reasoning refers to the fact that conclusions may be invalidated with new information. Existing work has constructed a non-monotonic inference dataset δ-NLI and explored the performance of language models on it. However, the δ-NLI dataset is entangled with commonsense reasoning. In this paper, we explore the pure non-monotonic reasoning ability of pre-trained language models. We build a non-monotonic reasoning benchmark, named LogicNMR, with explicit default rules and iterative updates. In the experimental part, the performance of popular language models on LogicNMR is explored from the perspectives of accuracy, generalization, proof-based traceability and robustness. The experimental results show that even though the fine-tuned language models achieve an accuracy of more than 94.4% on LogicNMR, they perform unsatisfactorily, with a significant drop, in generalization and proof-based traceability.
%R 10.18653/v1/2022.findings-emnlp.265
%U https://aclanthology.org/2022.findings-emnlp.265
%U https://doi.org/10.18653/v1/2022.findings-emnlp.265
%P 3616-3626
Markdown (Informal)
[LogicNMR: Probing the Non-monotonic Reasoning Ability of Pre-trained Language Models](https://aclanthology.org/2022.findings-emnlp.265) (Xiu et al., Findings 2022)
ACL