@inproceedings{krsnik-dobrovoljc-2025-stark,
title = "{STARK}: A Toolkit for Dependency (Sub)Tree Extraction and Analysis",
author = "Krsnik, Luka and
Dobrovoljc, Kaja",
editor = {Jablotschkin, Sarah and
K{\"u}bler, Sandra and
Zinsmeister, Heike},
booktitle = "Proceedings of the 23rd International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2025)",
month = aug,
year = "2025",
address = "Ljubljana, Slovenia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.tlt-1.5/",
pages = "44--51",
ISBN = "979-8-89176-291-6",
abstract = "We present STARK, a lightweight and flexible Python toolkit for extracting and analyzing syntactic (sub)trees from dependency-parsed corpora. By systematically slicing each sentence into interpretable syntactic units based on configurable parameters, STARK enables bottom-up, data-driven exploration of syntactic patterns at multiple levels of abstraction{---}from fully lexicalized constructions to general structural templates. It supports any CoNLL-U-formatted corpus and is available as a command-line tool, Python library, and interactive online demo, ensuring seamless integration into both exploratory and large-scale corpus workflows. We illustrate its functionality through case studies in noun phrase analysis, multiword expression identification, and syntactic variation across corpora, demonstrating its utility for a wide range of corpus-driven syntactic investigations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krsnik-dobrovoljc-2025-stark">
<titleInfo>
<title>STARK: A Toolkit for Dependency (Sub)Tree Extraction and Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luka</namePart>
<namePart type="family">Krsnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaja</namePart>
<namePart type="family">Dobrovoljc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Jablotschkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kübler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heike</namePart>
<namePart type="family">Zinsmeister</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Ljubljana, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-291-6</identifier>
</relatedItem>
<abstract>We present STARK, a lightweight and flexible Python toolkit for extracting and analyzing syntactic (sub)trees from dependency-parsed corpora. By systematically slicing each sentence into interpretable syntactic units based on configurable parameters, STARK enables bottom-up, data-driven exploration of syntactic patterns at multiple levels of abstraction—from fully lexicalized constructions to general structural templates. It supports any CoNLL-U-formatted corpus and is available as a command-line tool, Python library, and interactive online demo, ensuring seamless integration into both exploratory and large-scale corpus workflows. We illustrate its functionality through case studies in noun phrase analysis, multiword expression identification, and syntactic variation across corpora, demonstrating its utility for a wide range of corpus-driven syntactic investigations.</abstract>
<identifier type="citekey">krsnik-dobrovoljc-2025-stark</identifier>
<location>
<url>https://aclanthology.org/2025.tlt-1.5/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>44</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T STARK: A Toolkit for Dependency (Sub)Tree Extraction and Analysis
%A Krsnik, Luka
%A Dobrovoljc, Kaja
%Y Jablotschkin, Sarah
%Y Kübler, Sandra
%Y Zinsmeister, Heike
%S Proceedings of the 23rd International Workshop on Treebanks and Linguistic Theories (TLT, SyntaxFest 2025)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Ljubljana, Slovenia
%@ 979-8-89176-291-6
%F krsnik-dobrovoljc-2025-stark
%X We present STARK, a lightweight and flexible Python toolkit for extracting and analyzing syntactic (sub)trees from dependency-parsed corpora. By systematically slicing each sentence into interpretable syntactic units based on configurable parameters, STARK enables bottom-up, data-driven exploration of syntactic patterns at multiple levels of abstraction—from fully lexicalized constructions to general structural templates. It supports any CoNLL-U-formatted corpus and is available as a command-line tool, Python library, and interactive online demo, ensuring seamless integration into both exploratory and large-scale corpus workflows. We illustrate its functionality through case studies in noun phrase analysis, multiword expression identification, and syntactic variation across corpora, demonstrating its utility for a wide range of corpus-driven syntactic investigations.
%U https://aclanthology.org/2025.tlt-1.5/
%P 44-51
Markdown (Informal)
[STARK: A Toolkit for Dependency (Sub)Tree Extraction and Analysis](https://aclanthology.org/2025.tlt-1.5/) (Krsnik & Dobrovoljc, TLT-SyntaxFest 2025)
ACL