@inproceedings{schacht-2025-nocs,
title = "{N}o{C}s: A Non-Compound-Stable Splitter for {G}erman Compounds",
author = "Schacht, Carmen",
editor = "Velichkov, Boris and
Nikolova-Koleva, Ivelina and
Slavcheva, Milena",
booktitle = "Proceedings of the 9th Student Research Workshop associated with the International Conference Recent Advances in Natural Language Processing",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-stud.6/",
pages = "44--53",
abstract = "Compounding{---}the creation of highly complex lexical items through the combination of existing lexemes{---}can be considered one of the most efficient communication phenomenons, though the automatic processing of compound structures{---}especially of multi-constituent compounds{---}poses significant challenges for natural language processing. Existing tools like compound-split (Tuggener, 2016) perform well on compound head detection but are limited in handling long compounds and distinguishing compounds from non-compounds. This paper introduces NoCs (non-compound-stable splitter), a novel Python-based tool that extends the functionality of compound-split by incorporating recursive splitting, non-compound detection, and integration with state-of-the-art linguistic resources. NoCs employs a custom stack-and-buffer mechanism to traverse and decompose compounds robustly, even in cases involving multiple constituents. A large-scale evaluation using adapted GermaNet data shows that NoCs substantially outperforms compound-split in both non-compound identification and the recursive splitting of three- to five-constituent compounds, demonstrating its utility as a reliable resource for compound analysis in German."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schacht-2025-nocs">
<titleInfo>
<title>NoCs: A Non-Compound-Stable Splitter for German Compounds</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carmen</namePart>
<namePart type="family">Schacht</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Student Research Workshop associated with the International Conference Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Boris</namePart>
<namePart type="family">Velichkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Nikolova-Koleva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milena</namePart>
<namePart type="family">Slavcheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Compounding—the creation of highly complex lexical items through the combination of existing lexemes—can be considered one of the most efficient communication phenomenons, though the automatic processing of compound structures—especially of multi-constituent compounds—poses significant challenges for natural language processing. Existing tools like compound-split (Tuggener, 2016) perform well on compound head detection but are limited in handling long compounds and distinguishing compounds from non-compounds. This paper introduces NoCs (non-compound-stable splitter), a novel Python-based tool that extends the functionality of compound-split by incorporating recursive splitting, non-compound detection, and integration with state-of-the-art linguistic resources. NoCs employs a custom stack-and-buffer mechanism to traverse and decompose compounds robustly, even in cases involving multiple constituents. A large-scale evaluation using adapted GermaNet data shows that NoCs substantially outperforms compound-split in both non-compound identification and the recursive splitting of three- to five-constituent compounds, demonstrating its utility as a reliable resource for compound analysis in German.</abstract>
<identifier type="citekey">schacht-2025-nocs</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-stud.6/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>44</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NoCs: A Non-Compound-Stable Splitter for German Compounds
%A Schacht, Carmen
%Y Velichkov, Boris
%Y Nikolova-Koleva, Ivelina
%Y Slavcheva, Milena
%S Proceedings of the 9th Student Research Workshop associated with the International Conference Recent Advances in Natural Language Processing
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F schacht-2025-nocs
%X Compounding—the creation of highly complex lexical items through the combination of existing lexemes—can be considered one of the most efficient communication phenomenons, though the automatic processing of compound structures—especially of multi-constituent compounds—poses significant challenges for natural language processing. Existing tools like compound-split (Tuggener, 2016) perform well on compound head detection but are limited in handling long compounds and distinguishing compounds from non-compounds. This paper introduces NoCs (non-compound-stable splitter), a novel Python-based tool that extends the functionality of compound-split by incorporating recursive splitting, non-compound detection, and integration with state-of-the-art linguistic resources. NoCs employs a custom stack-and-buffer mechanism to traverse and decompose compounds robustly, even in cases involving multiple constituents. A large-scale evaluation using adapted GermaNet data shows that NoCs substantially outperforms compound-split in both non-compound identification and the recursive splitting of three- to five-constituent compounds, demonstrating its utility as a reliable resource for compound analysis in German.
%U https://aclanthology.org/2025.ranlp-stud.6/
%P 44-53
Markdown (Informal)
[NoCs: A Non-Compound-Stable Splitter for German Compounds](https://aclanthology.org/2025.ranlp-stud.6/) (Schacht, RANLP 2025)
ACL
- Carmen Schacht. 2025. NoCs: A Non-Compound-Stable Splitter for German Compounds. In Proceedings of the 9th Student Research Workshop associated with the International Conference Recent Advances in Natural Language Processing, pages 44–53, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.