@inproceedings{mosteiro-blasi-2025-word,
title = "Word boundaries and the morphology-syntax trade-off",
author = "Mosteiro, Pablo and
Blasi, Dami{\'a}n",
editor = "Yagi, Sane and
Yagi, Sane and
Sawalha, Majdi and
Shawar, Bayan Abu and
AlShdaifat, Abdallah T. and
Abbas, Norhan and
Organizers",
booktitle = "Proceedings of the New Horizons in Computational Linguistics for Religious Texts",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.clrel-1.9/",
pages = "86--93",
abstract = "This paper investigates the relationship between syntax and morphology in natural languages, focusing on the relation between the amount of information stored by word structure on the one hand, and word order on the other. In previous work, a trade-off between these was observed in a large corpus covering over a thousand languages, suggesting a dynamic {\textquoteleft}division of labor' between syntax and morphology, as well as yielding proof for the efficient coding of information in language. In contrast, we find that the trade-off can be explained by differing conventions in orthographic word boundaries. We do so by redefining word boundaries within languages either by increasing or decreasing the domain of wordhood implied by orthographic words. Namely, we paste frequent word-pairs together and split words into their frequently occurring component parts. These interventions yield the same trade-off within languages across word domains as what is observed across languages in the orthographic word domain. This allows us to conclude that the original claims on syntax-morphology trade-offs were spurious and that, more importantly, there does not seem to exist a privileged wordhood domain where within- and across-word regularities yield an optimal or optimized amount of information."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mosteiro-blasi-2025-word">
<titleInfo>
<title>Word boundaries and the morphology-syntax trade-off</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pablo</namePart>
<namePart type="family">Mosteiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damián</namePart>
<namePart type="family">Blasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the New Horizons in Computational Linguistics for Religious Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sane</namePart>
<namePart type="family">Yagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Majdi</namePart>
<namePart type="family">Sawalha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bayan</namePart>
<namePart type="given">Abu</namePart>
<namePart type="family">Shawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdallah</namePart>
<namePart type="given">T</namePart>
<namePart type="family">AlShdaifat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Norhan</namePart>
<namePart type="family">Abbas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name>
<namePart>Organizers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates the relationship between syntax and morphology in natural languages, focusing on the relation between the amount of information stored by word structure on the one hand, and word order on the other. In previous work, a trade-off between these was observed in a large corpus covering over a thousand languages, suggesting a dynamic ‘division of labor’ between syntax and morphology, as well as yielding proof for the efficient coding of information in language. In contrast, we find that the trade-off can be explained by differing conventions in orthographic word boundaries. We do so by redefining word boundaries within languages either by increasing or decreasing the domain of wordhood implied by orthographic words. Namely, we paste frequent word-pairs together and split words into their frequently occurring component parts. These interventions yield the same trade-off within languages across word domains as what is observed across languages in the orthographic word domain. This allows us to conclude that the original claims on syntax-morphology trade-offs were spurious and that, more importantly, there does not seem to exist a privileged wordhood domain where within- and across-word regularities yield an optimal or optimized amount of information.</abstract>
<identifier type="citekey">mosteiro-blasi-2025-word</identifier>
<location>
<url>https://aclanthology.org/2025.clrel-1.9/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>86</start>
<end>93</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word boundaries and the morphology-syntax trade-off
%A Mosteiro, Pablo
%A Blasi, Damián
%Y Yagi, Sane
%Y Sawalha, Majdi
%Y Shawar, Bayan Abu
%Y AlShdaifat, Abdallah T.
%Y Abbas, Norhan
%E Organizers
%S Proceedings of the New Horizons in Computational Linguistics for Religious Texts
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F mosteiro-blasi-2025-word
%X This paper investigates the relationship between syntax and morphology in natural languages, focusing on the relation between the amount of information stored by word structure on the one hand, and word order on the other. In previous work, a trade-off between these was observed in a large corpus covering over a thousand languages, suggesting a dynamic ‘division of labor’ between syntax and morphology, as well as yielding proof for the efficient coding of information in language. In contrast, we find that the trade-off can be explained by differing conventions in orthographic word boundaries. We do so by redefining word boundaries within languages either by increasing or decreasing the domain of wordhood implied by orthographic words. Namely, we paste frequent word-pairs together and split words into their frequently occurring component parts. These interventions yield the same trade-off within languages across word domains as what is observed across languages in the orthographic word domain. This allows us to conclude that the original claims on syntax-morphology trade-offs were spurious and that, more importantly, there does not seem to exist a privileged wordhood domain where within- and across-word regularities yield an optimal or optimized amount of information.
%U https://aclanthology.org/2025.clrel-1.9/
%P 86-93
Markdown (Informal)
[Word boundaries and the morphology-syntax trade-off](https://aclanthology.org/2025.clrel-1.9/) (Mosteiro & Blasi, CLRel 2025)
ACL