@inproceedings{mondini-etal-2025-uchaguzi,
title = "Uchaguzi-2022: A Dataset of Citizen Reports on the 2022 Kenyan Election",
author = "Mondini, Roberto and
Kotonya, Neema and
Logan IV, Robert L and
Olson, Elizabeth M. and
Oduor Lungati, Angela and
Odongo, Daniel and
Ombasa, Tim and
Lamba, Hemank and
Cahill, Aoife and
Tetreault, Joel and
Jaimes, Alejandro",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.387/",
pages = "5807--5825",
abstract = "Online reporting platforms have enabled citizens around the world to collectively share their opinions and report in real time on events impacting their local communities. Systematically organizing (e.g., categorizing by attributes) and geotagging large amounts of crowdsourced information is crucial to ensuring that accurate and meaningful insights can be drawn from this data and used by policy makers to bring about positive change. These tasks, however, typically require extensive manual annotation efforts. In this paper we present Uchaguzi-2022, a dataset of 14k categorized and geotagged citizen reports related to the 2022 Kenyan General Election containing mentions of election-related issues such as official misconduct, vote count irregularities, and acts of violence. We use this dataset to investigate whether language models can assist in scalably categorizing and geotagging reports, thus highlighting its potential application in the AI for Social Good space."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mondini-etal-2025-uchaguzi">
<titleInfo>
<title>Uchaguzi-2022: A Dataset of Citizen Reports on the 2022 Kenyan Election</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Mondini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neema</namePart>
<namePart type="family">Kotonya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Logan IV</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Olson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Oduor Lungati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Odongo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Ombasa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hemank</namePart>
<namePart type="family">Lamba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aoife</namePart>
<namePart type="family">Cahill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alejandro</namePart>
<namePart type="family">Jaimes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Online reporting platforms have enabled citizens around the world to collectively share their opinions and report in real time on events impacting their local communities. Systematically organizing (e.g., categorizing by attributes) and geotagging large amounts of crowdsourced information is crucial to ensuring that accurate and meaningful insights can be drawn from this data and used by policy makers to bring about positive change. These tasks, however, typically require extensive manual annotation efforts. In this paper we present Uchaguzi-2022, a dataset of 14k categorized and geotagged citizen reports related to the 2022 Kenyan General Election containing mentions of election-related issues such as official misconduct, vote count irregularities, and acts of violence. We use this dataset to investigate whether language models can assist in scalably categorizing and geotagging reports, thus highlighting its potential application in the AI for Social Good space.</abstract>
<identifier type="citekey">mondini-etal-2025-uchaguzi</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.387/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>5807</start>
<end>5825</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Uchaguzi-2022: A Dataset of Citizen Reports on the 2022 Kenyan Election
%A Mondini, Roberto
%A Kotonya, Neema
%A Logan IV, Robert L.
%A Olson, Elizabeth M.
%A Oduor Lungati, Angela
%A Odongo, Daniel
%A Ombasa, Tim
%A Lamba, Hemank
%A Cahill, Aoife
%A Tetreault, Joel
%A Jaimes, Alejandro
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F mondini-etal-2025-uchaguzi
%X Online reporting platforms have enabled citizens around the world to collectively share their opinions and report in real time on events impacting their local communities. Systematically organizing (e.g., categorizing by attributes) and geotagging large amounts of crowdsourced information is crucial to ensuring that accurate and meaningful insights can be drawn from this data and used by policy makers to bring about positive change. These tasks, however, typically require extensive manual annotation efforts. In this paper we present Uchaguzi-2022, a dataset of 14k categorized and geotagged citizen reports related to the 2022 Kenyan General Election containing mentions of election-related issues such as official misconduct, vote count irregularities, and acts of violence. We use this dataset to investigate whether language models can assist in scalably categorizing and geotagging reports, thus highlighting its potential application in the AI for Social Good space.
%U https://aclanthology.org/2025.coling-main.387/
%P 5807-5825
Markdown (Informal)
[Uchaguzi-2022: A Dataset of Citizen Reports on the 2022 Kenyan Election](https://aclanthology.org/2025.coling-main.387/) (Mondini et al., COLING 2025)
ACL
- Roberto Mondini, Neema Kotonya, Robert L Logan IV, Elizabeth M. Olson, Angela Oduor Lungati, Daniel Odongo, Tim Ombasa, Hemank Lamba, Aoife Cahill, Joel Tetreault, and Alejandro Jaimes. 2025. Uchaguzi-2022: A Dataset of Citizen Reports on the 2022 Kenyan Election. In Proceedings of the 31st International Conference on Computational Linguistics, pages 5807–5825, Abu Dhabi, UAE. Association for Computational Linguistics.