@inproceedings{wertz-etal-2022-evaluating,
title = "Evaluating Pre-Trained Sentence-{BERT} with Class Embeddings in Active Learning for Multi-Label Text Classification",
author = "Wertz, Lukas and
Bogojeska, Jasmina and
Mirylenka, Katsiaryna and
Kuhn, Jonas",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-short.45/",
doi = "10.18653/v1/2022.aacl-short.45",
pages = "366--372",
abstract = "The Transformer Language Model is a powerful tool that has been shown to excel at various NLP tasks and has become the de-facto standard solution thanks to its versatility. In this study, we employ pre-trained document embeddings in an Active Learning task to group samples with the same labels in the embedding space on a legal document corpus. We find that the calculated class embeddings are not close to the respective samples and consequently do not partition the embedding space in a meaningful way. In addition, we explore using the class embeddings as an Active Learning strategy with dramatically reduced results compared to all baselines."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wertz-etal-2022-evaluating">
<titleInfo>
<title>Evaluating Pre-Trained Sentence-BERT with Class Embeddings in Active Learning for Multi-Label Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lukas</namePart>
<namePart type="family">Wertz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jasmina</namePart>
<namePart type="family">Bogojeska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsiaryna</namePart>
<namePart type="family">Mirylenka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Kuhn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chua-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Transformer Language Model is a powerful tool that has been shown to excel at various NLP tasks and has become the de-facto standard solution thanks to its versatility. In this study, we employ pre-trained document embeddings in an Active Learning task to group samples with the same labels in the embedding space on a legal document corpus. We find that the calculated class embeddings are not close to the respective samples and consequently do not partition the embedding space in a meaningful way. In addition, we explore using the class embeddings as an Active Learning strategy with dramatically reduced results compared to all baselines.</abstract>
<identifier type="citekey">wertz-etal-2022-evaluating</identifier>
<identifier type="doi">10.18653/v1/2022.aacl-short.45</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-short.45/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>366</start>
<end>372</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Pre-Trained Sentence-BERT with Class Embeddings in Active Learning for Multi-Label Text Classification
%A Wertz, Lukas
%A Bogojeska, Jasmina
%A Mirylenka, Katsiaryna
%A Kuhn, Jonas
%Y He, Yulan
%Y Ji, Heng
%Y Li, Sujian
%Y Liu, Yang
%Y Chang, Chua-Hui
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F wertz-etal-2022-evaluating
%X The Transformer Language Model is a powerful tool that has been shown to excel at various NLP tasks and has become the de-facto standard solution thanks to its versatility. In this study, we employ pre-trained document embeddings in an Active Learning task to group samples with the same labels in the embedding space on a legal document corpus. We find that the calculated class embeddings are not close to the respective samples and consequently do not partition the embedding space in a meaningful way. In addition, we explore using the class embeddings as an Active Learning strategy with dramatically reduced results compared to all baselines.
%R 10.18653/v1/2022.aacl-short.45
%U https://aclanthology.org/2022.aacl-short.45/
%U https://doi.org/10.18653/v1/2022.aacl-short.45
%P 366-372
Markdown (Informal)
[Evaluating Pre-Trained Sentence-BERT with Class Embeddings in Active Learning for Multi-Label Text Classification](https://aclanthology.org/2022.aacl-short.45/) (Wertz et al., AACL-IJCNLP 2022)
ACL