@inproceedings{karakas-simsek-2026-lemmas,
title = "From Lemmas to Dependencies: What Signals Drive Light Verbs Classification?",
author = "Karakas, Sercan and
{\c{S}}im{\c{s}}ek, Yusuf",
editor = {Oflazer, Kemal and
K{\"o}ksal, Abdullatif and
Varol, Onur},
booktitle = "Proceedings of the Second Workshop Natural Language Processing for {T}urkic Languages ({SIGTURK} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.sigturk-1.18/",
pages = "220--227",
ISBN = "979-8-89176-370-8",
abstract = "Light verb constructions (LVCs) are a challenging class of verbal multiword expressions, especially in Turkish,where rich morphology and productive complex predicates create minimal contrasts between idiomatic predicatemeanings and literal verb{--}argument uses. This paper asks what signals drive LVC classification bysystematically restricting model inputs. Using UD-derived supervision, we compare lemma-driven baselines(lemma TF{--}IDF + Logistic Regression; BERTurk trained on lemma sequences), a grammar-only Logistic Regressionover UD morphosyntax (UPOS/DEPREL/MORPH), and a full-input BERTurk baseline. We evaluate on a controlleddiagnostic set with Random negatives, lexical controls (NLVC), and LVC positives, reporting split-wiseperformance to expose decision-boundary behavior. Results show that coarse morphosyntax alone is insufficientfor robust LVC detection under controlled contrasts, while lexical identity supports LVC judgments but issensitive to calibration and normalization choices. Overall, our findings motivate targeted evaluation forTurkish MWEs and highlight that ``lemma-only'' is not a single representation but depends critically on hownormalization is instantiated."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karakas-simsek-2026-lemmas">
<titleInfo>
<title>From Lemmas to Dependencies: What Signals Drive Light Verbs Classification?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sercan</namePart>
<namePart type="family">Karakas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuf</namePart>
<namePart type="family">Şimşek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kemal</namePart>
<namePart type="family">Oflazer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullatif</namePart>
<namePart type="family">Köksal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onur</namePart>
<namePart type="family">Varol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-370-8</identifier>
</relatedItem>
<abstract>Light verb constructions (LVCs) are a challenging class of verbal multiword expressions, especially in Turkish,where rich morphology and productive complex predicates create minimal contrasts between idiomatic predicatemeanings and literal verb–argument uses. This paper asks what signals drive LVC classification bysystematically restricting model inputs. Using UD-derived supervision, we compare lemma-driven baselines(lemma TF–IDF + Logistic Regression; BERTurk trained on lemma sequences), a grammar-only Logistic Regressionover UD morphosyntax (UPOS/DEPREL/MORPH), and a full-input BERTurk baseline. We evaluate on a controlleddiagnostic set with Random negatives, lexical controls (NLVC), and LVC positives, reporting split-wiseperformance to expose decision-boundary behavior. Results show that coarse morphosyntax alone is insufficientfor robust LVC detection under controlled contrasts, while lexical identity supports LVC judgments but issensitive to calibration and normalization choices. Overall, our findings motivate targeted evaluation forTurkish MWEs and highlight that “lemma-only” is not a single representation but depends critically on hownormalization is instantiated.</abstract>
<identifier type="citekey">karakas-simsek-2026-lemmas</identifier>
<location>
<url>https://aclanthology.org/2026.sigturk-1.18/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>220</start>
<end>227</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Lemmas to Dependencies: What Signals Drive Light Verbs Classification?
%A Karakas, Sercan
%A Şimşek, Yusuf
%Y Oflazer, Kemal
%Y Köksal, Abdullatif
%Y Varol, Onur
%S Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-370-8
%F karakas-simsek-2026-lemmas
%X Light verb constructions (LVCs) are a challenging class of verbal multiword expressions, especially in Turkish,where rich morphology and productive complex predicates create minimal contrasts between idiomatic predicatemeanings and literal verb–argument uses. This paper asks what signals drive LVC classification bysystematically restricting model inputs. Using UD-derived supervision, we compare lemma-driven baselines(lemma TF–IDF + Logistic Regression; BERTurk trained on lemma sequences), a grammar-only Logistic Regressionover UD morphosyntax (UPOS/DEPREL/MORPH), and a full-input BERTurk baseline. We evaluate on a controlleddiagnostic set with Random negatives, lexical controls (NLVC), and LVC positives, reporting split-wiseperformance to expose decision-boundary behavior. Results show that coarse morphosyntax alone is insufficientfor robust LVC detection under controlled contrasts, while lexical identity supports LVC judgments but issensitive to calibration and normalization choices. Overall, our findings motivate targeted evaluation forTurkish MWEs and highlight that “lemma-only” is not a single representation but depends critically on hownormalization is instantiated.
%U https://aclanthology.org/2026.sigturk-1.18/
%P 220-227
Markdown (Informal)
[From Lemmas to Dependencies: What Signals Drive Light Verbs Classification?](https://aclanthology.org/2026.sigturk-1.18/) (Karakas & Şimşek, SIGTURK 2026)
ACL