@inproceedings{meisenbacher-norlander-2026-building,
title = "Building a Custom Taxonomy of {AI} Skills and Tasks from the Ground Up with Job Postings",
author = "Meisenbacher, Stephen and
Norlander, Peter",
editor = "Mysore, Sheshera and
Kumar, Sachin and
Balachandran, Vidhisha and
Hayati, Shirley Anugrah and
Brahman, Faeze and
Moussa, Hanane Nour and
Salemi, Alireza",
booktitle = "Proceedings of the Second Workshop on Customizable {NLP}: Progress and Challenges in Customizing {NLP} for a Domain, Application, Group, or Individual ({C}ustom{NLP}4{U})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.customnlp4u-1.11/",
pages = "117--130",
ISBN = "979-8-89176-396-8",
abstract = "Utilizing LLMs for automated taxonomy construction presents a clear opportunity for the comprehensive, yet efficient mapping of potentially complex domains. When contending with high volumes of rapidly growing corpora, however, it becomes unclear how to best leverage such data for optimal taxonomy construction. Taking the case of systematizing *AI skills in the workplace*, we use two large-scale job postings corpora to investigate key design decisions for the inclusion (or exclusion) of data points for taxonomy construction. We propose **TaxonomyBuilder** as a blueprint for our systematic study, with which we evaluate various configurations of custom, data-informed, and hierarchical taxonomies. We demonstrate that *less* data can provide more clarity: filtering inputs to **TaxonomyBuilder** provides better domain-specific coverage than offering unfiltered inputs to clustering and LLM-enhanced hierarchical taxonomy labeling tools."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="meisenbacher-norlander-2026-building">
<titleInfo>
<title>Building a Custom Taxonomy of AI Skills and Tasks from the Ground Up with Job Postings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stephen</namePart>
<namePart type="family">Meisenbacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Norlander</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Customizable NLP: Progress and Challenges in Customizing NLP for a Domain, Application, Group, or Individual (CustomNLP4U)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheshera</namePart>
<namePart type="family">Mysore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sachin</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vidhisha</namePart>
<namePart type="family">Balachandran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shirley</namePart>
<namePart type="given">Anugrah</namePart>
<namePart type="family">Hayati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faeze</namePart>
<namePart type="family">Brahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanane</namePart>
<namePart type="given">Nour</namePart>
<namePart type="family">Moussa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alireza</namePart>
<namePart type="family">Salemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-396-8</identifier>
</relatedItem>
<abstract>Utilizing LLMs for automated taxonomy construction presents a clear opportunity for the comprehensive, yet efficient mapping of potentially complex domains. When contending with high volumes of rapidly growing corpora, however, it becomes unclear how to best leverage such data for optimal taxonomy construction. Taking the case of systematizing *AI skills in the workplace*, we use two large-scale job postings corpora to investigate key design decisions for the inclusion (or exclusion) of data points for taxonomy construction. We propose **TaxonomyBuilder** as a blueprint for our systematic study, with which we evaluate various configurations of custom, data-informed, and hierarchical taxonomies. We demonstrate that *less* data can provide more clarity: filtering inputs to **TaxonomyBuilder** provides better domain-specific coverage than offering unfiltered inputs to clustering and LLM-enhanced hierarchical taxonomy labeling tools.</abstract>
<identifier type="citekey">meisenbacher-norlander-2026-building</identifier>
<location>
<url>https://aclanthology.org/2026.customnlp4u-1.11/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>117</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Custom Taxonomy of AI Skills and Tasks from the Ground Up with Job Postings
%A Meisenbacher, Stephen
%A Norlander, Peter
%Y Mysore, Sheshera
%Y Kumar, Sachin
%Y Balachandran, Vidhisha
%Y Hayati, Shirley Anugrah
%Y Brahman, Faeze
%Y Moussa, Hanane Nour
%Y Salemi, Alireza
%S Proceedings of the Second Workshop on Customizable NLP: Progress and Challenges in Customizing NLP for a Domain, Application, Group, or Individual (CustomNLP4U)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-396-8
%F meisenbacher-norlander-2026-building
%X Utilizing LLMs for automated taxonomy construction presents a clear opportunity for the comprehensive, yet efficient mapping of potentially complex domains. When contending with high volumes of rapidly growing corpora, however, it becomes unclear how to best leverage such data for optimal taxonomy construction. Taking the case of systematizing *AI skills in the workplace*, we use two large-scale job postings corpora to investigate key design decisions for the inclusion (or exclusion) of data points for taxonomy construction. We propose **TaxonomyBuilder** as a blueprint for our systematic study, with which we evaluate various configurations of custom, data-informed, and hierarchical taxonomies. We demonstrate that *less* data can provide more clarity: filtering inputs to **TaxonomyBuilder** provides better domain-specific coverage than offering unfiltered inputs to clustering and LLM-enhanced hierarchical taxonomy labeling tools.
%U https://aclanthology.org/2026.customnlp4u-1.11/
%P 117-130
Markdown (Informal)
[Building a Custom Taxonomy of AI Skills and Tasks from the Ground Up with Job Postings](https://aclanthology.org/2026.customnlp4u-1.11/) (Meisenbacher & Norlander, CustomNLP4U 2026)
ACL