@inproceedings{yanaka-etal-2025-jbbq,
title = "{JBBQ}: {J}apanese Bias Benchmark for Analyzing Social Biases in Large Language Models",
author = "Yanaka, Hitomi and
Han, Namgi and
Kumon, Ryoma and
Jie, Lu and
Takeshita, Masashi and
Sekizawa, Ryo and
Kat{\^o}, Taisei and
Arai, Hiromi",
editor = "Fale{\'n}ska, Agnieszka and
Basta, Christine and
Costa-juss{\`a}, Marta and
Sta{\'n}czak, Karolina and
Nozza, Debora",
booktitle = "Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.gebnlp-1.1/",
doi = "10.18653/v1/2025.gebnlp-1.1",
pages = "1--17",
ISBN = "979-8-89176-277-0",
abstract = "With the development of large language models (LLMs), social biases in these LLMs have become a pressing issue.Although there are various benchmarks for social biases across languages, the extent to which Japanese LLMs exhibit social biases has not been fully investigated.In this study, we construct the Japanese Bias Benchmark dataset for Question Answering (JBBQ) based on the English bias benchmark BBQ, with analysis of social biases in Japanese LLMs.The results show that while current open Japanese LLMs with more parameters show improved accuracies on JBBQ, their bias scores increase.In addition, prompts with a warning about social biases and chain-of-thought prompting reduce the effect of biases in model outputs, but there is room for improvement in extracting the correct evidence from contexts in Japanese. Our dataset is available at https://github.com/ynklab/JBBQ{\_}data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yanaka-etal-2025-jbbq">
<titleInfo>
<title>JBBQ: Japanese Bias Benchmark for Analyzing Social Biases in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hitomi</namePart>
<namePart type="family">Yanaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Namgi</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryoma</namePart>
<namePart type="family">Kumon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Jie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masashi</namePart>
<namePart type="family">Takeshita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryo</namePart>
<namePart type="family">Sekizawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taisei</namePart>
<namePart type="family">Katô</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiromi</namePart>
<namePart type="family">Arai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agnieszka</namePart>
<namePart type="family">Faleńska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Basta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karolina</namePart>
<namePart type="family">Stańczak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-277-0</identifier>
</relatedItem>
<abstract>With the development of large language models (LLMs), social biases in these LLMs have become a pressing issue.Although there are various benchmarks for social biases across languages, the extent to which Japanese LLMs exhibit social biases has not been fully investigated.In this study, we construct the Japanese Bias Benchmark dataset for Question Answering (JBBQ) based on the English bias benchmark BBQ, with analysis of social biases in Japanese LLMs.The results show that while current open Japanese LLMs with more parameters show improved accuracies on JBBQ, their bias scores increase.In addition, prompts with a warning about social biases and chain-of-thought prompting reduce the effect of biases in model outputs, but there is room for improvement in extracting the correct evidence from contexts in Japanese. Our dataset is available at https://github.com/ynklab/JBBQ_data.</abstract>
<identifier type="citekey">yanaka-etal-2025-jbbq</identifier>
<identifier type="doi">10.18653/v1/2025.gebnlp-1.1</identifier>
<location>
<url>https://aclanthology.org/2025.gebnlp-1.1/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>1</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JBBQ: Japanese Bias Benchmark for Analyzing Social Biases in Large Language Models
%A Yanaka, Hitomi
%A Han, Namgi
%A Kumon, Ryoma
%A Jie, Lu
%A Takeshita, Masashi
%A Sekizawa, Ryo
%A Katô, Taisei
%A Arai, Hiromi
%Y Faleńska, Agnieszka
%Y Basta, Christine
%Y Costa-jussà, Marta
%Y Stańczak, Karolina
%Y Nozza, Debora
%S Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-277-0
%F yanaka-etal-2025-jbbq
%X With the development of large language models (LLMs), social biases in these LLMs have become a pressing issue.Although there are various benchmarks for social biases across languages, the extent to which Japanese LLMs exhibit social biases has not been fully investigated.In this study, we construct the Japanese Bias Benchmark dataset for Question Answering (JBBQ) based on the English bias benchmark BBQ, with analysis of social biases in Japanese LLMs.The results show that while current open Japanese LLMs with more parameters show improved accuracies on JBBQ, their bias scores increase.In addition, prompts with a warning about social biases and chain-of-thought prompting reduce the effect of biases in model outputs, but there is room for improvement in extracting the correct evidence from contexts in Japanese. Our dataset is available at https://github.com/ynklab/JBBQ_data.
%R 10.18653/v1/2025.gebnlp-1.1
%U https://aclanthology.org/2025.gebnlp-1.1/
%U https://doi.org/10.18653/v1/2025.gebnlp-1.1
%P 1-17
Markdown (Informal)
[JBBQ: Japanese Bias Benchmark for Analyzing Social Biases in Large Language Models](https://aclanthology.org/2025.gebnlp-1.1/) (Yanaka et al., GeBNLP 2025)
ACL
- Hitomi Yanaka, Namgi Han, Ryoma Kumon, Lu Jie, Masashi Takeshita, Ryo Sekizawa, Taisei Katô, and Hiromi Arai. 2025. JBBQ: Japanese Bias Benchmark for Analyzing Social Biases in Large Language Models. In Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP), pages 1–17, Vienna, Austria. Association for Computational Linguistics.