@inproceedings{chen-etal-2026-computational,
title = "A Computational Method for Measuring Open Codes in Qualitative Analysis",
author = "Chen, John and
Lotsos, Alexandros Nikolaos and
Cheng, Sihan and
Zhao, Lexie and
Zhang, Yanjia and
Hullman, Jessica and
Sherin, Bruce and
Wilensky, Uri and
Horn, Michael",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.2073/",
pages = "41740--41758",
ISBN = "979-8-89176-395-1",
abstract = "Qualitative analysis is critical to understanding human datasets in many social science disciplines. A central method in this process is inductive coding, where researchers identify and interpret codes directly from the datasets themselves. Yet, this exploratory approach poses challenges for meeting methodological expectations (such as ``depth'' and ``variation''), especially as researchers increasingly adopt Generative AI (GAI) for support. Ground-truth-based metrics are insufficient because they contradict the exploratory nature of inductive coding; cluster- or topic-level metrics fail to capture the interpretive, cross-cutting nature of qualitative codes; and manual evaluation can be labor-intensive. This paper presents a theory-informed computational method for measuring inductive coding results from humans and GAI. Our method first merges individual codebooks into an Aggregated Code Space using an LLM-enriched hierarchical clustering algorithm. It then measures each coder{'}s contribution against the merged result using four novel metrics: Coverage, Overlap, Novelty, and Divergence, designed to capture breadth, consensus, unique contribution, and systematic deviation without assuming ground truth. Through two experiments on a human-coded online conversation dataset, we 1) reveal the merging algorithm{'}s impact on metrics; 2) validate the metrics' stability and robustness across multiple runs and different LLMs; and 3) showcase the metrics' ability to diagnose coding issues, such as excessive or irrelevant (hallucinated) codes. We discuss how these metrics should be interpreted in combination and their current limitations. Our work provides a reliable pathway for ensuring methodological rigor in human-AI qualitative analysis."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2026-computational">
<titleInfo>
<title>A Computational Method for Measuring Open Codes in Qualitative Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Lotsos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sihan</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lexie</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanjia</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jessica</namePart>
<namePart type="family">Hullman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bruce</namePart>
<namePart type="family">Sherin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Uri</namePart>
<namePart type="family">Wilensky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Horn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Qualitative analysis is critical to understanding human datasets in many social science disciplines. A central method in this process is inductive coding, where researchers identify and interpret codes directly from the datasets themselves. Yet, this exploratory approach poses challenges for meeting methodological expectations (such as “depth” and “variation”), especially as researchers increasingly adopt Generative AI (GAI) for support. Ground-truth-based metrics are insufficient because they contradict the exploratory nature of inductive coding; cluster- or topic-level metrics fail to capture the interpretive, cross-cutting nature of qualitative codes; and manual evaluation can be labor-intensive. This paper presents a theory-informed computational method for measuring inductive coding results from humans and GAI. Our method first merges individual codebooks into an Aggregated Code Space using an LLM-enriched hierarchical clustering algorithm. It then measures each coder’s contribution against the merged result using four novel metrics: Coverage, Overlap, Novelty, and Divergence, designed to capture breadth, consensus, unique contribution, and systematic deviation without assuming ground truth. Through two experiments on a human-coded online conversation dataset, we 1) reveal the merging algorithm’s impact on metrics; 2) validate the metrics’ stability and robustness across multiple runs and different LLMs; and 3) showcase the metrics’ ability to diagnose coding issues, such as excessive or irrelevant (hallucinated) codes. We discuss how these metrics should be interpreted in combination and their current limitations. Our work provides a reliable pathway for ensuring methodological rigor in human-AI qualitative analysis.</abstract>
<identifier type="citekey">chen-etal-2026-computational</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.2073/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>41740</start>
<end>41758</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Computational Method for Measuring Open Codes in Qualitative Analysis
%A Chen, John
%A Lotsos, Alexandros Nikolaos
%A Cheng, Sihan
%A Zhao, Lexie
%A Zhang, Yanjia
%A Hullman, Jessica
%A Sherin, Bruce
%A Wilensky, Uri
%A Horn, Michael
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F chen-etal-2026-computational
%X Qualitative analysis is critical to understanding human datasets in many social science disciplines. A central method in this process is inductive coding, where researchers identify and interpret codes directly from the datasets themselves. Yet, this exploratory approach poses challenges for meeting methodological expectations (such as “depth” and “variation”), especially as researchers increasingly adopt Generative AI (GAI) for support. Ground-truth-based metrics are insufficient because they contradict the exploratory nature of inductive coding; cluster- or topic-level metrics fail to capture the interpretive, cross-cutting nature of qualitative codes; and manual evaluation can be labor-intensive. This paper presents a theory-informed computational method for measuring inductive coding results from humans and GAI. Our method first merges individual codebooks into an Aggregated Code Space using an LLM-enriched hierarchical clustering algorithm. It then measures each coder’s contribution against the merged result using four novel metrics: Coverage, Overlap, Novelty, and Divergence, designed to capture breadth, consensus, unique contribution, and systematic deviation without assuming ground truth. Through two experiments on a human-coded online conversation dataset, we 1) reveal the merging algorithm’s impact on metrics; 2) validate the metrics’ stability and robustness across multiple runs and different LLMs; and 3) showcase the metrics’ ability to diagnose coding issues, such as excessive or irrelevant (hallucinated) codes. We discuss how these metrics should be interpreted in combination and their current limitations. Our work provides a reliable pathway for ensuring methodological rigor in human-AI qualitative analysis.
%U https://aclanthology.org/2026.findings-acl.2073/
%P 41740-41758
Markdown (Informal)
[A Computational Method for Measuring Open Codes in Qualitative Analysis](https://aclanthology.org/2026.findings-acl.2073/) (Chen et al., Findings 2026)
ACL
- John Chen, Alexandros Nikolaos Lotsos, Sihan Cheng, Lexie Zhao, Yanjia Zhang, Jessica Hullman, Bruce Sherin, Uri Wilensky, and Michael Horn. 2026. A Computational Method for Measuring Open Codes in Qualitative Analysis. In Findings of the Association for Computational Linguistics: ACL 2026, pages 41740–41758, San Diego, California, United States. Association for Computational Linguistics.