@inproceedings{kennedy-2025-evidence,
title = "Evidence of Generative Syntax in {LLM}s",
author = "Kennedy, Mary",
editor = "Boleda, Gemma and
Roth, Michael",
booktitle = "Proceedings of the 29th Conference on Computational Natural Language Learning",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.conll-1.25/",
doi = "10.18653/v1/2025.conll-1.25",
pages = "377--396",
ISBN = "979-8-89176-271-8",
abstract = "The syntactic probing literature has been largely limited to shallow structures like dependency trees, which are unable to capture the subtle differences in sub-surface syntactic structures that yield semantic nuances. These structures are captured by theories of syntax like generative syntax, but have not been researched in the LLM literature due to the difficulties in probing these complex structures with many silent, covert nodes. Our work presents a method for overcoming this limitation by deploying Hewitt and Manning{'}s (2019) dependency-trained probe on sentence constructions whose structural representation is identical in a dependency parse, but differs in theoretical syntax. If a pretrained language model has captured the theoretical syntax structure, then the probe{'}s predicted distances should vary in syntactically-predicted ways. Using this methodology and a novel dataset, we find evidence that LLMs have captured syntactic structures far richer than previously realized, indicating LLMs are able to capture the nuanced meanings that result from sub-surface differences in structural form."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kennedy-2025-evidence">
<titleInfo>
<title>Evidence of Generative Syntax in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Kennedy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-271-8</identifier>
</relatedItem>
<abstract>The syntactic probing literature has been largely limited to shallow structures like dependency trees, which are unable to capture the subtle differences in sub-surface syntactic structures that yield semantic nuances. These structures are captured by theories of syntax like generative syntax, but have not been researched in the LLM literature due to the difficulties in probing these complex structures with many silent, covert nodes. Our work presents a method for overcoming this limitation by deploying Hewitt and Manning’s (2019) dependency-trained probe on sentence constructions whose structural representation is identical in a dependency parse, but differs in theoretical syntax. If a pretrained language model has captured the theoretical syntax structure, then the probe’s predicted distances should vary in syntactically-predicted ways. Using this methodology and a novel dataset, we find evidence that LLMs have captured syntactic structures far richer than previously realized, indicating LLMs are able to capture the nuanced meanings that result from sub-surface differences in structural form.</abstract>
<identifier type="citekey">kennedy-2025-evidence</identifier>
<identifier type="doi">10.18653/v1/2025.conll-1.25</identifier>
<location>
<url>https://aclanthology.org/2025.conll-1.25/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>377</start>
<end>396</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evidence of Generative Syntax in LLMs
%A Kennedy, Mary
%Y Boleda, Gemma
%Y Roth, Michael
%S Proceedings of the 29th Conference on Computational Natural Language Learning
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-271-8
%F kennedy-2025-evidence
%X The syntactic probing literature has been largely limited to shallow structures like dependency trees, which are unable to capture the subtle differences in sub-surface syntactic structures that yield semantic nuances. These structures are captured by theories of syntax like generative syntax, but have not been researched in the LLM literature due to the difficulties in probing these complex structures with many silent, covert nodes. Our work presents a method for overcoming this limitation by deploying Hewitt and Manning’s (2019) dependency-trained probe on sentence constructions whose structural representation is identical in a dependency parse, but differs in theoretical syntax. If a pretrained language model has captured the theoretical syntax structure, then the probe’s predicted distances should vary in syntactically-predicted ways. Using this methodology and a novel dataset, we find evidence that LLMs have captured syntactic structures far richer than previously realized, indicating LLMs are able to capture the nuanced meanings that result from sub-surface differences in structural form.
%R 10.18653/v1/2025.conll-1.25
%U https://aclanthology.org/2025.conll-1.25/
%U https://doi.org/10.18653/v1/2025.conll-1.25
%P 377-396
Markdown (Informal)
[Evidence of Generative Syntax in LLMs](https://aclanthology.org/2025.conll-1.25/) (Kennedy, CoNLL 2025)
ACL
- Mary Kennedy. 2025. Evidence of Generative Syntax in LLMs. In Proceedings of the 29th Conference on Computational Natural Language Learning, pages 377–396, Vienna, Austria. Association for Computational Linguistics.