@inproceedings{roemmele-gordon-2024-test,
title = "From Test-Taking to Test-Making: Examining {LLM} Authoring of Commonsense Assessment Items",
author = "Roemmele, Melissa and
Gordon, Andrew",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.299/",
doi = "10.18653/v1/2024.findings-emnlp.299",
pages = "5193--5203",
abstract = "LLMs can now perform a variety of complex writing tasks. They also excel in answering questions pertaining to natural language inference and commonsense reasoning. Composing these questions is itself a skilled writing task, so in this paper we consider LLMs as authors of commonsense assessment items. We prompt LLMs to generate items in the style of a prominent benchmark for commonsense reasoning, the Choice of Plausible Alternatives (COPA). We examine the outcome according to analyses facilitated by the LLMs and human annotation. We find that LLMs that succeed in answering the original COPA benchmark are also more successful in authoring their own items."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="roemmele-gordon-2024-test">
<titleInfo>
<title>From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items</title>
</titleInfo>
<name type="personal">
<namePart type="given">Melissa</namePart>
<namePart type="family">Roemmele</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Gordon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>LLMs can now perform a variety of complex writing tasks. They also excel in answering questions pertaining to natural language inference and commonsense reasoning. Composing these questions is itself a skilled writing task, so in this paper we consider LLMs as authors of commonsense assessment items. We prompt LLMs to generate items in the style of a prominent benchmark for commonsense reasoning, the Choice of Plausible Alternatives (COPA). We examine the outcome according to analyses facilitated by the LLMs and human annotation. We find that LLMs that succeed in answering the original COPA benchmark are also more successful in authoring their own items.</abstract>
<identifier type="citekey">roemmele-gordon-2024-test</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.299</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.299/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>5193</start>
<end>5203</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items
%A Roemmele, Melissa
%A Gordon, Andrew
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F roemmele-gordon-2024-test
%X LLMs can now perform a variety of complex writing tasks. They also excel in answering questions pertaining to natural language inference and commonsense reasoning. Composing these questions is itself a skilled writing task, so in this paper we consider LLMs as authors of commonsense assessment items. We prompt LLMs to generate items in the style of a prominent benchmark for commonsense reasoning, the Choice of Plausible Alternatives (COPA). We examine the outcome according to analyses facilitated by the LLMs and human annotation. We find that LLMs that succeed in answering the original COPA benchmark are also more successful in authoring their own items.
%R 10.18653/v1/2024.findings-emnlp.299
%U https://aclanthology.org/2024.findings-emnlp.299/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.299
%P 5193-5203
Markdown (Informal)
[From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items](https://aclanthology.org/2024.findings-emnlp.299/) (Roemmele & Gordon, Findings 2024)
ACL