@article{koto-etal-2024-indoculture,
title = "{I}ndo{C}ulture: Exploring Geographically Influenced Cultural Commonsense Reasoning Across Eleven {I}ndonesian Provinces",
author = "Koto, Fajri and
Mahendra, Rahmad and
Aisyah, Nurul and
Baldwin, Timothy",
journal = "Transactions of the Association for Computational Linguistics",
volume = "12",
year = "2024",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2024.tacl-1.92/",
doi = "10.1162/tacl_a_00726",
pages = "1703--1719",
abstract = "Although commonsense reasoning is greatly shaped by cultural and geographical factors, previous studies have predominantly centered on cultures grounded in the English language, potentially resulting in an Anglocentric bias. In this paper, we introduce IndoCulture, aimed at understanding the influence of geographical factors on language model reasoning ability, with a specific emphasis on the diverse cultures found within eleven Indonesian provinces. In contrast to prior work that has relied on templates (Yin et al., 2022) and online scrapping (Fung et al., 2024), we create IndoCulture by asking local people to manually develop a cultural context and plausible options, across a set of predefined topics. Evaluation of 27 language models reveals several insights: (1) the open-weight Llama{--}3 is competitive with GPT{--}4, while other open-weight models struggle, with accuracies below 50{\%}; (2) there is a general pattern of models generally performing better for some provinces, such as Bali and West Java, and less well for others; and (3) the inclusion of location context enhances performance, especially for larger models like GPT{--}4, emphasizing the significance of geographical context in commonsense reasoning.1"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="koto-etal-2024-indoculture">
<titleInfo>
<title>IndoCulture: Exploring Geographically Influenced Cultural Commonsense Reasoning Across Eleven Indonesian Provinces</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fajri</namePart>
<namePart type="family">Koto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahmad</namePart>
<namePart type="family">Mahendra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nurul</namePart>
<namePart type="family">Aisyah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Although commonsense reasoning is greatly shaped by cultural and geographical factors, previous studies have predominantly centered on cultures grounded in the English language, potentially resulting in an Anglocentric bias. In this paper, we introduce IndoCulture, aimed at understanding the influence of geographical factors on language model reasoning ability, with a specific emphasis on the diverse cultures found within eleven Indonesian provinces. In contrast to prior work that has relied on templates (Yin et al., 2022) and online scrapping (Fung et al., 2024), we create IndoCulture by asking local people to manually develop a cultural context and plausible options, across a set of predefined topics. Evaluation of 27 language models reveals several insights: (1) the open-weight Llama–3 is competitive with GPT–4, while other open-weight models struggle, with accuracies below 50%; (2) there is a general pattern of models generally performing better for some provinces, such as Bali and West Java, and less well for others; and (3) the inclusion of location context enhances performance, especially for larger models like GPT–4, emphasizing the significance of geographical context in commonsense reasoning.1</abstract>
<identifier type="citekey">koto-etal-2024-indoculture</identifier>
<identifier type="doi">10.1162/tacl_a_00726</identifier>
<location>
<url>https://aclanthology.org/2024.tacl-1.92/</url>
</location>
<part>
<date>2024</date>
<detail type="volume"><number>12</number></detail>
<extent unit="page">
<start>1703</start>
<end>1719</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T IndoCulture: Exploring Geographically Influenced Cultural Commonsense Reasoning Across Eleven Indonesian Provinces
%A Koto, Fajri
%A Mahendra, Rahmad
%A Aisyah, Nurul
%A Baldwin, Timothy
%J Transactions of the Association for Computational Linguistics
%D 2024
%V 12
%I MIT Press
%C Cambridge, MA
%F koto-etal-2024-indoculture
%X Although commonsense reasoning is greatly shaped by cultural and geographical factors, previous studies have predominantly centered on cultures grounded in the English language, potentially resulting in an Anglocentric bias. In this paper, we introduce IndoCulture, aimed at understanding the influence of geographical factors on language model reasoning ability, with a specific emphasis on the diverse cultures found within eleven Indonesian provinces. In contrast to prior work that has relied on templates (Yin et al., 2022) and online scrapping (Fung et al., 2024), we create IndoCulture by asking local people to manually develop a cultural context and plausible options, across a set of predefined topics. Evaluation of 27 language models reveals several insights: (1) the open-weight Llama–3 is competitive with GPT–4, while other open-weight models struggle, with accuracies below 50%; (2) there is a general pattern of models generally performing better for some provinces, such as Bali and West Java, and less well for others; and (3) the inclusion of location context enhances performance, especially for larger models like GPT–4, emphasizing the significance of geographical context in commonsense reasoning.1
%R 10.1162/tacl_a_00726
%U https://aclanthology.org/2024.tacl-1.92/
%U https://doi.org/10.1162/tacl_a_00726
%P 1703-1719
Markdown (Informal)
[IndoCulture: Exploring Geographically Influenced Cultural Commonsense Reasoning Across Eleven Indonesian Provinces](https://aclanthology.org/2024.tacl-1.92/) (Koto et al., TACL 2024)
ACL