@inproceedings{li-etal-2026-examining,
title = "Examining Large Language Models' form-meaning mappings of information structure constructions in {M}andarin {C}hinese",
author = "Li, Shihui and
Tan, Xiaojuan and
Bloem, Jelke",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.conll-main.37/",
pages = "613--625",
ISBN = "979-8-89176-410-1",
abstract = "Construction Grammar (CxG) knowledge in language models has been extensively studied for English, but remains underexplored in other languages. In Mandarin Chinese, the \textit{ba} (把, disposal) and \textit{bei} (被, passive) constructions are widely used for managing information structure. They foreground topical elements (information structure) and encode systematic form-meaning mappings (CxG), particularly with respect to the semantic role of the object. We probe language models' linguistic competence with these constructions using minimal pairs, constructing a new minimal-pair dataset comprising seven paradigms that target both syntactic constraints and verb{--}construction compatibility. Our results show that it remains a challenge for many models to capture the form-meaning mappings underlying the \textit{ba} construction, although they achieve high accuracy on paradigms driven by surface syntactic cues."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2026-examining">
<titleInfo>
<title>Examining Large Language Models’ form-meaning mappings of information structure constructions in Mandarin Chinese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shihui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojuan</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jelke</namePart>
<namePart type="family">Bloem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 30th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Bonial</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yevgeni</namePart>
<namePart type="family">Berzak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-410-1</identifier>
</relatedItem>
<abstract>Construction Grammar (CxG) knowledge in language models has been extensively studied for English, but remains underexplored in other languages. In Mandarin Chinese, the ba (把, disposal) and bei (被, passive) constructions are widely used for managing information structure. They foreground topical elements (information structure) and encode systematic form-meaning mappings (CxG), particularly with respect to the semantic role of the object. We probe language models’ linguistic competence with these constructions using minimal pairs, constructing a new minimal-pair dataset comprising seven paradigms that target both syntactic constraints and verb–construction compatibility. Our results show that it remains a challenge for many models to capture the form-meaning mappings underlying the ba construction, although they achieve high accuracy on paradigms driven by surface syntactic cues.</abstract>
<identifier type="citekey">li-etal-2026-examining</identifier>
<location>
<url>https://aclanthology.org/2026.conll-main.37/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>613</start>
<end>625</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Examining Large Language Models’ form-meaning mappings of information structure constructions in Mandarin Chinese
%A Li, Shihui
%A Tan, Xiaojuan
%A Bloem, Jelke
%Y Bonial, Claire
%Y Berzak, Yevgeni
%S Proceedings of the 30th Conference on Computational Natural Language Learning
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-410-1
%F li-etal-2026-examining
%X Construction Grammar (CxG) knowledge in language models has been extensively studied for English, but remains underexplored in other languages. In Mandarin Chinese, the ba (把, disposal) and bei (被, passive) constructions are widely used for managing information structure. They foreground topical elements (information structure) and encode systematic form-meaning mappings (CxG), particularly with respect to the semantic role of the object. We probe language models’ linguistic competence with these constructions using minimal pairs, constructing a new minimal-pair dataset comprising seven paradigms that target both syntactic constraints and verb–construction compatibility. Our results show that it remains a challenge for many models to capture the form-meaning mappings underlying the ba construction, although they achieve high accuracy on paradigms driven by surface syntactic cues.
%U https://aclanthology.org/2026.conll-main.37/
%P 613-625
Markdown (Informal)
[Examining Large Language Models’ form-meaning mappings of information structure constructions in Mandarin Chinese](https://aclanthology.org/2026.conll-main.37/) (Li et al., CoNLL 2026)
ACL