@inproceedings{acquaye-etal-2024-susu,
title = "{S}usu Box or Piggy Bank: Assessing Cultural Commonsense Knowledge between {G}hana and the {US}",
author = "Acquaye, Christabel and
An, Haozhe and
Rudinger, Rachel",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.532",
pages = "9483--9502",
abstract = "Recent work has highlighted the culturally-contingent nature of commonsense knowledge. We introduce AMAMMERε, a test set of 525 multiple-choice questions designed to evaluate the commonsense knowledge of English LLMs, relative to the cultural contexts of Ghana and the United States. To create AMAMMERε, we select a set of multiple-choice questions (MCQs) from existing commonsense datasets and rewrite them in a multi-stage process involving surveys of Ghanaian and U.S. participants. In three rounds of surveys, participants from both pools are solicited to (1) write correct and incorrect answer choices, (2) rate individual answer choices on a 5-point Likert scale, and (3) select the best answer choice from the newly-constructed MCQ items, in a final validation step. By engaging participants at multiple stages, our procedure ensures that participant perspectives are incorporated both in the creation and validation of test items, resulting in high levels of agreement within each pool. We evaluate several off-the-shelf English LLMs on AMAMMERε. Uniformly, models prefer answers choices that align with the preferences of U.S. annotators over Ghanaian annotators. Additionally, when test items specify a cultural context (Ghana or the U.S.), models exhibit some ability to adapt, but performance is consistently better in U.S. contexts than Ghanaian. As large resources are devoted to the advancement of English LLMs, our findings underscore the need for culturally adaptable models and evaluations to meet the needs of diverse English-speaking populations around the world.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="acquaye-etal-2024-susu">
<titleInfo>
<title>Susu Box or Piggy Bank: Assessing Cultural Commonsense Knowledge between Ghana and the US</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christabel</namePart>
<namePart type="family">Acquaye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haozhe</namePart>
<namePart type="family">An</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Rudinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work has highlighted the culturally-contingent nature of commonsense knowledge. We introduce AMAMMERε, a test set of 525 multiple-choice questions designed to evaluate the commonsense knowledge of English LLMs, relative to the cultural contexts of Ghana and the United States. To create AMAMMERε, we select a set of multiple-choice questions (MCQs) from existing commonsense datasets and rewrite them in a multi-stage process involving surveys of Ghanaian and U.S. participants. In three rounds of surveys, participants from both pools are solicited to (1) write correct and incorrect answer choices, (2) rate individual answer choices on a 5-point Likert scale, and (3) select the best answer choice from the newly-constructed MCQ items, in a final validation step. By engaging participants at multiple stages, our procedure ensures that participant perspectives are incorporated both in the creation and validation of test items, resulting in high levels of agreement within each pool. We evaluate several off-the-shelf English LLMs on AMAMMERε. Uniformly, models prefer answers choices that align with the preferences of U.S. annotators over Ghanaian annotators. Additionally, when test items specify a cultural context (Ghana or the U.S.), models exhibit some ability to adapt, but performance is consistently better in U.S. contexts than Ghanaian. As large resources are devoted to the advancement of English LLMs, our findings underscore the need for culturally adaptable models and evaluations to meet the needs of diverse English-speaking populations around the world.</abstract>
<identifier type="citekey">acquaye-etal-2024-susu</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.532</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>9483</start>
<end>9502</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Susu Box or Piggy Bank: Assessing Cultural Commonsense Knowledge between Ghana and the US
%A Acquaye, Christabel
%A An, Haozhe
%A Rudinger, Rachel
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F acquaye-etal-2024-susu
%X Recent work has highlighted the culturally-contingent nature of commonsense knowledge. We introduce AMAMMERε, a test set of 525 multiple-choice questions designed to evaluate the commonsense knowledge of English LLMs, relative to the cultural contexts of Ghana and the United States. To create AMAMMERε, we select a set of multiple-choice questions (MCQs) from existing commonsense datasets and rewrite them in a multi-stage process involving surveys of Ghanaian and U.S. participants. In three rounds of surveys, participants from both pools are solicited to (1) write correct and incorrect answer choices, (2) rate individual answer choices on a 5-point Likert scale, and (3) select the best answer choice from the newly-constructed MCQ items, in a final validation step. By engaging participants at multiple stages, our procedure ensures that participant perspectives are incorporated both in the creation and validation of test items, resulting in high levels of agreement within each pool. We evaluate several off-the-shelf English LLMs on AMAMMERε. Uniformly, models prefer answers choices that align with the preferences of U.S. annotators over Ghanaian annotators. Additionally, when test items specify a cultural context (Ghana or the U.S.), models exhibit some ability to adapt, but performance is consistently better in U.S. contexts than Ghanaian. As large resources are devoted to the advancement of English LLMs, our findings underscore the need for culturally adaptable models and evaluations to meet the needs of diverse English-speaking populations around the world.
%U https://aclanthology.org/2024.emnlp-main.532
%P 9483-9502
Markdown (Informal)
[Susu Box or Piggy Bank: Assessing Cultural Commonsense Knowledge between Ghana and the US](https://aclanthology.org/2024.emnlp-main.532) (Acquaye et al., EMNLP 2024)
ACL