@inproceedings{halat-atlamaz-2024-implicatr,
title = "{I}mplica{TR}: A Granular Dataset for Natural Language Inference and Pragmatic Reasoning in {T}urkish",
author = {Halat, Mustafa and
Atlamaz, {\"U}mit},
editor = {Ataman, Duygu and
Derin, Mehmet Oguz and
Ivanova, Sardana and
K{\"o}ksal, Abdullatif and
S{\"a}lev{\"a}, Jonne and
Zeyrek, Deniz},
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Turkic Languages (SIGTURK 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sigturk-1.3",
pages = "29--41",
abstract = "We introduce ImplicaTR, a linguistically informed diagnostic dataset designed to evaluate semantic and pragmatic reasoning capabilities of Natural Language Inference (NLI) models in Turkish. Existing Turkish NLI datasets treat NLI as determining whether a sentence pair represents $\textit{entailment}$, $\textit{contradiction}$, or a $\textit{neutral}$ relation. Such datasets do not distinguish between $\textit{semantic entailment}$ and $\textit{pragmatic implicature}$, which linguists have long recognized as separate inferences types. ImplicaTR addresses this by testing NLI models{'} ability to differentiate between $\textit{entailment}$ and $\textit{implicature}$, thus assessing their pragmatic reasoning skills. The dataset consists of 19,350 semi-automatically generated sentence pairs covering $\textit{implicature, entailment, contradiction,}$ and $\textit{neutral}$ relations. We evaluated various models (BERT, Gemma, Llama-2, and Mistral) on ImplicaTR and found out that these models can reach up to 98{\%} accuracy on semantic and pragmatic reasoning. We also fine tuned various models on subsets of ImplicaTR to test the abilities of NLI models to generalize across unseen implicature contexts. Our results indicate that model performance is highly dependent on the diversity of linguistic expressions within each subset, highlighting a weakness in the abstract generalization capabilities of large language models regarding pragmatic reasoning. We share all the code, models, and the dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="halat-atlamaz-2024-implicatr">
<titleInfo>
<title>ImplicaTR: A Granular Dataset for Natural Language Inference and Pragmatic Reasoning in Turkish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Halat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ümit</namePart>
<namePart type="family">Atlamaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Turkic Languages (SIGTURK 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Duygu</namePart>
<namePart type="family">Ataman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Oguz</namePart>
<namePart type="family">Derin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sardana</namePart>
<namePart type="family">Ivanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullatif</namePart>
<namePart type="family">Köksal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonne</namePart>
<namePart type="family">Sälevä</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Zeyrek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce ImplicaTR, a linguistically informed diagnostic dataset designed to evaluate semantic and pragmatic reasoning capabilities of Natural Language Inference (NLI) models in Turkish. Existing Turkish NLI datasets treat NLI as determining whether a sentence pair represents entailment, contradiction, or a neutral relation. Such datasets do not distinguish between semantic entailment and pragmatic implicature, which linguists have long recognized as separate inferences types. ImplicaTR addresses this by testing NLI models’ ability to differentiate between entailment and implicature, thus assessing their pragmatic reasoning skills. The dataset consists of 19,350 semi-automatically generated sentence pairs covering implicature, entailment, contradiction, and neutral relations. We evaluated various models (BERT, Gemma, Llama-2, and Mistral) on ImplicaTR and found out that these models can reach up to 98% accuracy on semantic and pragmatic reasoning. We also fine tuned various models on subsets of ImplicaTR to test the abilities of NLI models to generalize across unseen implicature contexts. Our results indicate that model performance is highly dependent on the diversity of linguistic expressions within each subset, highlighting a weakness in the abstract generalization capabilities of large language models regarding pragmatic reasoning. We share all the code, models, and the dataset.</abstract>
<identifier type="citekey">halat-atlamaz-2024-implicatr</identifier>
<location>
<url>https://aclanthology.org/2024.sigturk-1.3</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>29</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ImplicaTR: A Granular Dataset for Natural Language Inference and Pragmatic Reasoning in Turkish
%A Halat, Mustafa
%A Atlamaz, Ümit
%Y Ataman, Duygu
%Y Derin, Mehmet Oguz
%Y Ivanova, Sardana
%Y Köksal, Abdullatif
%Y Sälevä, Jonne
%Y Zeyrek, Deniz
%S Proceedings of the First Workshop on Natural Language Processing for Turkic Languages (SIGTURK 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand and Online
%F halat-atlamaz-2024-implicatr
%X We introduce ImplicaTR, a linguistically informed diagnostic dataset designed to evaluate semantic and pragmatic reasoning capabilities of Natural Language Inference (NLI) models in Turkish. Existing Turkish NLI datasets treat NLI as determining whether a sentence pair represents entailment, contradiction, or a neutral relation. Such datasets do not distinguish between semantic entailment and pragmatic implicature, which linguists have long recognized as separate inferences types. ImplicaTR addresses this by testing NLI models’ ability to differentiate between entailment and implicature, thus assessing their pragmatic reasoning skills. The dataset consists of 19,350 semi-automatically generated sentence pairs covering implicature, entailment, contradiction, and neutral relations. We evaluated various models (BERT, Gemma, Llama-2, and Mistral) on ImplicaTR and found out that these models can reach up to 98% accuracy on semantic and pragmatic reasoning. We also fine tuned various models on subsets of ImplicaTR to test the abilities of NLI models to generalize across unseen implicature contexts. Our results indicate that model performance is highly dependent on the diversity of linguistic expressions within each subset, highlighting a weakness in the abstract generalization capabilities of large language models regarding pragmatic reasoning. We share all the code, models, and the dataset.
%U https://aclanthology.org/2024.sigturk-1.3
%P 29-41
Markdown (Informal)
[ImplicaTR: A Granular Dataset for Natural Language Inference and Pragmatic Reasoning in Turkish](https://aclanthology.org/2024.sigturk-1.3) (Halat & Atlamaz, SIGTURK-WS 2024)
ACL