@inproceedings{abzianidze-etal-2023-spacenli,
title = "{S}pace{NLI}: Evaluating the Consistency of Predicting Inferences In Space",
author = "Abzianidze, Lasha and
Zwarts, Joost and
Winter, Yoad",
editor = "Chatzikyriakidis, Stergios and
de Paiva, Valeria",
booktitle = "Proceedings of the 4th Natural Logic Meets Machine Learning Workshop",
month = jun,
year = "2023",
address = "Nancy, France",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.naloma-1.2",
pages = "12--24",
abstract = "While many natural language inference (NLI) datasets target certain semantic phenomena, e.g., negation, tense {\&} aspect, monotonicity, and presupposition, to the best of our knowledge, there is no NLI dataset that involves diverse types of spatial expressions and reasoning. We fill this gap by semi-automatically creating an NLI dataset for spatial reasoning, called SpaceNLI. The data samples are automatically generated from a curated set of reasoning patterns (see Figure 1), where the patterns are annotated with inference labels by experts. We test several SOTA NLI systems on SpaceNLI to gauge the complexity of the dataset and the system{'}s capacity for spatial reasoning. Moreover, we introduce a \textit{Pattern Accuracy} and argue that it is a more reliable and stricter measure than the accuracy for evaluating a system{'}s performance on pattern-based generated data samples. Based on the evaluation results we find that the systems obtain moderate results on the spatial NLI problems but lack consistency per inference pattern. The results also reveal that non-projective spatial inferences (especially due to the {``}between{''} preposition) are the most challenging ones.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abzianidze-etal-2023-spacenli">
<titleInfo>
<title>SpaceNLI: Evaluating the Consistency of Predicting Inferences In Space</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lasha</namePart>
<namePart type="family">Abzianidze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joost</namePart>
<namePart type="family">Zwarts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoad</namePart>
<namePart type="family">Winter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Natural Logic Meets Machine Learning Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stergios</namePart>
<namePart type="family">Chatzikyriakidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valeria</namePart>
<namePart type="family">de Paiva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Nancy, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While many natural language inference (NLI) datasets target certain semantic phenomena, e.g., negation, tense & aspect, monotonicity, and presupposition, to the best of our knowledge, there is no NLI dataset that involves diverse types of spatial expressions and reasoning. We fill this gap by semi-automatically creating an NLI dataset for spatial reasoning, called SpaceNLI. The data samples are automatically generated from a curated set of reasoning patterns (see Figure 1), where the patterns are annotated with inference labels by experts. We test several SOTA NLI systems on SpaceNLI to gauge the complexity of the dataset and the system’s capacity for spatial reasoning. Moreover, we introduce a Pattern Accuracy and argue that it is a more reliable and stricter measure than the accuracy for evaluating a system’s performance on pattern-based generated data samples. Based on the evaluation results we find that the systems obtain moderate results on the spatial NLI problems but lack consistency per inference pattern. The results also reveal that non-projective spatial inferences (especially due to the “between” preposition) are the most challenging ones.</abstract>
<identifier type="citekey">abzianidze-etal-2023-spacenli</identifier>
<location>
<url>https://aclanthology.org/2023.naloma-1.2</url>
</location>
<part>
<date>2023-06</date>
<extent unit="page">
<start>12</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SpaceNLI: Evaluating the Consistency of Predicting Inferences In Space
%A Abzianidze, Lasha
%A Zwarts, Joost
%A Winter, Yoad
%Y Chatzikyriakidis, Stergios
%Y de Paiva, Valeria
%S Proceedings of the 4th Natural Logic Meets Machine Learning Workshop
%D 2023
%8 June
%I Association for Computational Linguistics
%C Nancy, France
%F abzianidze-etal-2023-spacenli
%X While many natural language inference (NLI) datasets target certain semantic phenomena, e.g., negation, tense & aspect, monotonicity, and presupposition, to the best of our knowledge, there is no NLI dataset that involves diverse types of spatial expressions and reasoning. We fill this gap by semi-automatically creating an NLI dataset for spatial reasoning, called SpaceNLI. The data samples are automatically generated from a curated set of reasoning patterns (see Figure 1), where the patterns are annotated with inference labels by experts. We test several SOTA NLI systems on SpaceNLI to gauge the complexity of the dataset and the system’s capacity for spatial reasoning. Moreover, we introduce a Pattern Accuracy and argue that it is a more reliable and stricter measure than the accuracy for evaluating a system’s performance on pattern-based generated data samples. Based on the evaluation results we find that the systems obtain moderate results on the spatial NLI problems but lack consistency per inference pattern. The results also reveal that non-projective spatial inferences (especially due to the “between” preposition) are the most challenging ones.
%U https://aclanthology.org/2023.naloma-1.2
%P 12-24
Markdown (Informal)
[SpaceNLI: Evaluating the Consistency of Predicting Inferences In Space](https://aclanthology.org/2023.naloma-1.2) (Abzianidze et al., NALOMA-WS 2023)
ACL