@inproceedings{reese-smirnova-2025-linguistic,
title = "Linguistic proficiency of humans and {LLM}s in {J}apanese: Effects of task demands and content",
author = "Reese, May Lynn and
Smirnova, Anastasia",
editor = "Wilson, Joshua and
Ormerod, Christopher and
Beiting Parrish, Magdalen",
booktitle = "Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers",
month = oct,
year = "2025",
address = "Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States",
publisher = "National Council on Measurement in Education (NCME)",
url = "https://aclanthology.org/2025.aimecon-main.22/",
pages = "201--211",
ISBN = "979-8-218-84228-4",
abstract = "We evaluate linguistic proficiency of humans and LLMs on pronoun resolution in Japanese, using the Winograd Schema Challenge dataset. Humans outperform LLMs in the baseline condition, but we find evidence for task demand effectss in both humans and LLMs. We also found that LLMs surpass human performance in scenarios referencing US culture, providing strong evidence for content effects."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="reese-smirnova-2025-linguistic">
<titleInfo>
<title>Linguistic proficiency of humans and LLMs in Japanese: Effects of task demands and content</title>
</titleInfo>
<name type="personal">
<namePart type="given">May</namePart>
<namePart type="given">Lynn</namePart>
<namePart type="family">Reese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Smirnova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Ormerod</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magdalen</namePart>
<namePart type="family">Beiting Parrish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>National Council on Measurement in Education (NCME)</publisher>
<place>
<placeTerm type="text">Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-218-84228-4</identifier>
</relatedItem>
<abstract>We evaluate linguistic proficiency of humans and LLMs on pronoun resolution in Japanese, using the Winograd Schema Challenge dataset. Humans outperform LLMs in the baseline condition, but we find evidence for task demand effectss in both humans and LLMs. We also found that LLMs surpass human performance in scenarios referencing US culture, providing strong evidence for content effects.</abstract>
<identifier type="citekey">reese-smirnova-2025-linguistic</identifier>
<location>
<url>https://aclanthology.org/2025.aimecon-main.22/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>201</start>
<end>211</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linguistic proficiency of humans and LLMs in Japanese: Effects of task demands and content
%A Reese, May Lynn
%A Smirnova, Anastasia
%Y Wilson, Joshua
%Y Ormerod, Christopher
%Y Beiting Parrish, Magdalen
%S Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Full Papers
%D 2025
%8 October
%I National Council on Measurement in Education (NCME)
%C Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States
%@ 979-8-218-84228-4
%F reese-smirnova-2025-linguistic
%X We evaluate linguistic proficiency of humans and LLMs on pronoun resolution in Japanese, using the Winograd Schema Challenge dataset. Humans outperform LLMs in the baseline condition, but we find evidence for task demand effectss in both humans and LLMs. We also found that LLMs surpass human performance in scenarios referencing US culture, providing strong evidence for content effects.
%U https://aclanthology.org/2025.aimecon-main.22/
%P 201-211
Markdown (Informal)
[Linguistic proficiency of humans and LLMs in Japanese: Effects of task demands and content](https://aclanthology.org/2025.aimecon-main.22/) (Reese & Smirnova, AIME-Con 2025)
ACL