@inproceedings{qiu-etal-2024-evaluating,
title = "Evaluating Grammatical Well-Formedness in Large Language Models: A Comparative Study with Human Judgments",
author = "Qiu, Zhuang and
Duan, Xufeng and
Cai, Zhenguang",
editor = "Kuribayashi, Tatsuki and
Rambelli, Giulia and
Takmaz, Ece and
Wicke, Philipp and
Oseki, Yohei",
booktitle = "Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.cmcl-1.16",
doi = "10.18653/v1/2024.cmcl-1.16",
pages = "189--198",
abstract = "Research in artificial intelligence has witnessed the surge of large language models (LLMs) demonstrating improved performance in various natural language processing tasks. This has sparked significant discussions about the extent to which large language models emulate human linguistic cognition and usage. This study delves into the representation of grammatical well-formedness in LLMs, which is a critical aspect of linguistic knowledge. In three preregistered experiments, we collected grammaticality judgment data for over 2400 English sentences with varying structures from ChatGPT and Vicuna, comparing them with human judgment data. The results reveal substantial alignment in the assessment of grammatical correctness between LLMs and human judgments, albeit with LLMs often showing more conservative judgments for grammatical correctness or incorrectness.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qiu-etal-2024-evaluating">
<titleInfo>
<title>Evaluating Grammatical Well-Formedness in Large Language Models: A Comparative Study with Human Judgments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhuang</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xufeng</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenguang</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuki</namePart>
<namePart type="family">Kuribayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giulia</namePart>
<namePart type="family">Rambelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ece</namePart>
<namePart type="family">Takmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Wicke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Oseki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Research in artificial intelligence has witnessed the surge of large language models (LLMs) demonstrating improved performance in various natural language processing tasks. This has sparked significant discussions about the extent to which large language models emulate human linguistic cognition and usage. This study delves into the representation of grammatical well-formedness in LLMs, which is a critical aspect of linguistic knowledge. In three preregistered experiments, we collected grammaticality judgment data for over 2400 English sentences with varying structures from ChatGPT and Vicuna, comparing them with human judgment data. The results reveal substantial alignment in the assessment of grammatical correctness between LLMs and human judgments, albeit with LLMs often showing more conservative judgments for grammatical correctness or incorrectness.</abstract>
<identifier type="citekey">qiu-etal-2024-evaluating</identifier>
<identifier type="doi">10.18653/v1/2024.cmcl-1.16</identifier>
<location>
<url>https://aclanthology.org/2024.cmcl-1.16</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>189</start>
<end>198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Grammatical Well-Formedness in Large Language Models: A Comparative Study with Human Judgments
%A Qiu, Zhuang
%A Duan, Xufeng
%A Cai, Zhenguang
%Y Kuribayashi, Tatsuki
%Y Rambelli, Giulia
%Y Takmaz, Ece
%Y Wicke, Philipp
%Y Oseki, Yohei
%S Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F qiu-etal-2024-evaluating
%X Research in artificial intelligence has witnessed the surge of large language models (LLMs) demonstrating improved performance in various natural language processing tasks. This has sparked significant discussions about the extent to which large language models emulate human linguistic cognition and usage. This study delves into the representation of grammatical well-formedness in LLMs, which is a critical aspect of linguistic knowledge. In three preregistered experiments, we collected grammaticality judgment data for over 2400 English sentences with varying structures from ChatGPT and Vicuna, comparing them with human judgment data. The results reveal substantial alignment in the assessment of grammatical correctness between LLMs and human judgments, albeit with LLMs often showing more conservative judgments for grammatical correctness or incorrectness.
%R 10.18653/v1/2024.cmcl-1.16
%U https://aclanthology.org/2024.cmcl-1.16
%U https://doi.org/10.18653/v1/2024.cmcl-1.16
%P 189-198
Markdown (Informal)
[Evaluating Grammatical Well-Formedness in Large Language Models: A Comparative Study with Human Judgments](https://aclanthology.org/2024.cmcl-1.16) (Qiu et al., CMCL-WS 2024)
ACL