@inproceedings{surdeanu-etal-2022-taxonomy,
title = "Taxonomy Builder: a Data-driven and User-centric Tool for Streamlining Taxonomy Construction",
author = "Surdeanu, Mihai and
Hungerford, John and
Chan, Yee Seng and
MacBride, Jessica and
Gyori, Benjamin and
Zupon, Andrew and
Tang, Zheng and
Qiu, Haoling and
Min, Bonan and
Zverev, Yan and
Hilverman, Caitlin and
Thomas, Max and
Andrews, Walter and
Alcock, Keith and
Zhang, Zeyu and
Reynolds, Michael and
Bethard, Steven and
Sharp, Rebecca and
Laparra, Egoitz",
editor = "Blodgett, Su Lin and
Daum{\'e} III, Hal and
Madaio, Michael and
Nenkova, Ani and
O'Connor, Brendan and
Wallach, Hanna and
Yang, Qian",
booktitle = "Proceedings of the Second Workshop on Bridging Human--Computer Interaction and Natural Language Processing",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.hcinlp-1.1",
doi = "10.18653/v1/2022.hcinlp-1.1",
pages = "1--10",
abstract = "An existing domain taxonomy for normalizing content is often assumed when discussing approaches to information extraction, yet often in real-world scenarios there is none. When one does exist, as the information needs shift, it must be continually extended. This is a slow and tedious task, and one which does not scale well. Here we propose an interactive tool that allows a taxonomy to be built or extended \textit{rapidly} and with a \textit{human in the loop} to control precision. We apply insights from text summarization and information extraction to reduce the search space dramatically, then leverage modern pretrained language models to perform contextualized clustering of the remaining concepts to yield candidate nodes for the user to review. We show this allows a user to consider as many as 200 taxonomy concept candidates an hour, to quickly build or extend a taxonomy to better fit information needs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="surdeanu-etal-2022-taxonomy">
<titleInfo>
<title>Taxonomy Builder: a Data-driven and User-centric Tool for Streamlining Taxonomy Construction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Hungerford</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yee</namePart>
<namePart type="given">Seng</namePart>
<namePart type="family">Chan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jessica</namePart>
<namePart type="family">MacBride</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Gyori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Zupon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoling</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bonan</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Zverev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caitlin</namePart>
<namePart type="family">Hilverman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Max</namePart>
<namePart type="family">Thomas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walter</namePart>
<namePart type="family">Andrews</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keith</namePart>
<namePart type="family">Alcock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Reynolds</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Sharp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Egoitz</namePart>
<namePart type="family">Laparra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Bridging Human–Computer Interaction and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Su</namePart>
<namePart type="given">Lin</namePart>
<namePart type="family">Blodgett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hal</namePart>
<namePart type="family">Daumé III</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Madaio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ani</namePart>
<namePart type="family">Nenkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanna</namePart>
<namePart type="family">Wallach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An existing domain taxonomy for normalizing content is often assumed when discussing approaches to information extraction, yet often in real-world scenarios there is none. When one does exist, as the information needs shift, it must be continually extended. This is a slow and tedious task, and one which does not scale well. Here we propose an interactive tool that allows a taxonomy to be built or extended rapidly and with a human in the loop to control precision. We apply insights from text summarization and information extraction to reduce the search space dramatically, then leverage modern pretrained language models to perform contextualized clustering of the remaining concepts to yield candidate nodes for the user to review. We show this allows a user to consider as many as 200 taxonomy concept candidates an hour, to quickly build or extend a taxonomy to better fit information needs.</abstract>
<identifier type="citekey">surdeanu-etal-2022-taxonomy</identifier>
<identifier type="doi">10.18653/v1/2022.hcinlp-1.1</identifier>
<location>
<url>https://aclanthology.org/2022.hcinlp-1.1</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Taxonomy Builder: a Data-driven and User-centric Tool for Streamlining Taxonomy Construction
%A Surdeanu, Mihai
%A Hungerford, John
%A Chan, Yee Seng
%A MacBride, Jessica
%A Gyori, Benjamin
%A Zupon, Andrew
%A Tang, Zheng
%A Qiu, Haoling
%A Min, Bonan
%A Zverev, Yan
%A Hilverman, Caitlin
%A Thomas, Max
%A Andrews, Walter
%A Alcock, Keith
%A Zhang, Zeyu
%A Reynolds, Michael
%A Bethard, Steven
%A Sharp, Rebecca
%A Laparra, Egoitz
%Y Blodgett, Su Lin
%Y Daumé III, Hal
%Y Madaio, Michael
%Y Nenkova, Ani
%Y O’Connor, Brendan
%Y Wallach, Hanna
%Y Yang, Qian
%S Proceedings of the Second Workshop on Bridging Human–Computer Interaction and Natural Language Processing
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F surdeanu-etal-2022-taxonomy
%X An existing domain taxonomy for normalizing content is often assumed when discussing approaches to information extraction, yet often in real-world scenarios there is none. When one does exist, as the information needs shift, it must be continually extended. This is a slow and tedious task, and one which does not scale well. Here we propose an interactive tool that allows a taxonomy to be built or extended rapidly and with a human in the loop to control precision. We apply insights from text summarization and information extraction to reduce the search space dramatically, then leverage modern pretrained language models to perform contextualized clustering of the remaining concepts to yield candidate nodes for the user to review. We show this allows a user to consider as many as 200 taxonomy concept candidates an hour, to quickly build or extend a taxonomy to better fit information needs.
%R 10.18653/v1/2022.hcinlp-1.1
%U https://aclanthology.org/2022.hcinlp-1.1
%U https://doi.org/10.18653/v1/2022.hcinlp-1.1
%P 1-10
Markdown (Informal)
[Taxonomy Builder: a Data-driven and User-centric Tool for Streamlining Taxonomy Construction](https://aclanthology.org/2022.hcinlp-1.1) (Surdeanu et al., HCINLP 2022)
ACL
- Mihai Surdeanu, John Hungerford, Yee Seng Chan, Jessica MacBride, Benjamin Gyori, Andrew Zupon, Zheng Tang, Haoling Qiu, Bonan Min, Yan Zverev, Caitlin Hilverman, Max Thomas, Walter Andrews, Keith Alcock, Zeyu Zhang, Michael Reynolds, Steven Bethard, Rebecca Sharp, and Egoitz Laparra. 2022. Taxonomy Builder: a Data-driven and User-centric Tool for Streamlining Taxonomy Construction. In Proceedings of the Second Workshop on Bridging Human--Computer Interaction and Natural Language Processing, pages 1–10, Seattle, Washington. Association for Computational Linguistics.