@inproceedings{cavusoglu-coltekin-2026-idiom,
title = "An Idiom Benchmark for {T}urkish",
author = "{\c{C}}avu{\c{s}}o{\u{g}}lu, Ebru and
Coltekin, Cagri",
editor = {Ojha, Atul Kr. and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Stoyanova, Ivelina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 22nd Workshop on Multiword Expressions ({MWE} 2026)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mwe-1.12/",
pages = "103--109",
ISBN = "979-8-89176-363-0",
abstract = "Despite recent significant advances, idioms, like other forms of figurative language, present a challenge to natural language processing (NLP). Benchmark corpora are essential for improving the current models on understanding idioms. However, such corpora are only available for a limited set of languages. In this paper, we introduce our ongoing work on a benchmark corpus of Turkish idioms. Our corpus is structured for testing both idiom recognition and idiom understanding. The corpus is currently consists of 200 instances with sentences including idiomatic use, their literal paraphrases, similar sentences with no entailment, and non-idiomatic use of the idiomatic expressions when possible. We describe the methodology used to create the corpus, as well as initial experiments with a selection of LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cavusoglu-coltekin-2026-idiom">
<titleInfo>
<title>An Idiom Benchmark for Turkish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ebru</namePart>
<namePart type="family">Çavuşoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cagri</namePart>
<namePart type="family">Coltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="given">Barbu</namePart>
<namePart type="family">Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Constant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Stoyanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Rademaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-363-0</identifier>
</relatedItem>
<abstract>Despite recent significant advances, idioms, like other forms of figurative language, present a challenge to natural language processing (NLP). Benchmark corpora are essential for improving the current models on understanding idioms. However, such corpora are only available for a limited set of languages. In this paper, we introduce our ongoing work on a benchmark corpus of Turkish idioms. Our corpus is structured for testing both idiom recognition and idiom understanding. The corpus is currently consists of 200 instances with sentences including idiomatic use, their literal paraphrases, similar sentences with no entailment, and non-idiomatic use of the idiomatic expressions when possible. We describe the methodology used to create the corpus, as well as initial experiments with a selection of LLMs.</abstract>
<identifier type="citekey">cavusoglu-coltekin-2026-idiom</identifier>
<location>
<url>https://aclanthology.org/2026.mwe-1.12/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>103</start>
<end>109</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Idiom Benchmark for Turkish
%A Çavuşoğlu, Ebru
%A Coltekin, Cagri
%Y Ojha, Atul Kr.
%Y Mititelu, Verginica Barbu
%Y Constant, Mathieu
%Y Stoyanova, Ivelina
%Y Doğruöz, A. Seza
%Y Rademaker, Alexandre
%S Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-363-0
%F cavusoglu-coltekin-2026-idiom
%X Despite recent significant advances, idioms, like other forms of figurative language, present a challenge to natural language processing (NLP). Benchmark corpora are essential for improving the current models on understanding idioms. However, such corpora are only available for a limited set of languages. In this paper, we introduce our ongoing work on a benchmark corpus of Turkish idioms. Our corpus is structured for testing both idiom recognition and idiom understanding. The corpus is currently consists of 200 instances with sentences including idiomatic use, their literal paraphrases, similar sentences with no entailment, and non-idiomatic use of the idiomatic expressions when possible. We describe the methodology used to create the corpus, as well as initial experiments with a selection of LLMs.
%U https://aclanthology.org/2026.mwe-1.12/
%P 103-109
Markdown (Informal)
[An Idiom Benchmark for Turkish](https://aclanthology.org/2026.mwe-1.12/) (Çavuşoğlu & Coltekin, MWE 2026)
ACL
- Ebru Çavuşoğlu and Cagri Coltekin. 2026. An Idiom Benchmark for Turkish. In Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026), pages 103–109, Rabat, Marocco. Association for Computational Linguistics.