@inproceedings{wehnert-etal-2021-hotter-hierarchical,
title = "{HOTTER}: {H}ierarchical Optimal Topic Transport with Explanatory Context Representations",
author = {Wehnert, Sabine and
Scheel, Christian and
Szak{\'a}cs-Behling, Simona and
Niel{\"a}nder, Maret and
Mielke, Patrick and
De Luca, Ernesto William},
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.418",
doi = "10.18653/v1/2021.findings-emnlp.418",
pages = "4856--4866",
abstract = "Natural language processing (NLP) is often the backbone of today{'}s systems for user interactions, information retrieval and others. Many of such NLP applications rely on specialized learned representations (e.g. neural word embeddings, topic models) that improve the ability to reason about the relationships between documents of a corpus. Paired with the progress in learned representations, the similarity metrics used to compare representations of documents are also evolving, with numerous proposals differing in computation time or interpretability. In this paper we propose an extension to a specific emerging hybrid document distance metric which combines topic models and word embeddings: the Hierarchical Optimal Topic Transport (HOTT). In specific, we extend HOTT by using context-enhanced word representations. We provide a validation of our approach on public datasets, using the language model BERT for a document categorization task. Results indicate competitive performance of the extended HOTT metric. We furthermore apply the HOTT metric and its extension to support educational media research, with a retrieval task of matching topics in German curricula to educational textbooks passages, along with offering an auxiliary explanatory document representing the dominant topic of the retrieved document. In a user study, our explanation method is preferred over regular topic keywords.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wehnert-etal-2021-hotter-hierarchical">
<titleInfo>
<title>HOTTER: Hierarchical Optimal Topic Transport with Explanatory Context Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sabine</namePart>
<namePart type="family">Wehnert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Scheel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="family">Szakács-Behling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maret</namePart>
<namePart type="family">Nieländer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Mielke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ernesto</namePart>
<namePart type="given">William</namePart>
<namePart type="family">De Luca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Natural language processing (NLP) is often the backbone of today’s systems for user interactions, information retrieval and others. Many of such NLP applications rely on specialized learned representations (e.g. neural word embeddings, topic models) that improve the ability to reason about the relationships between documents of a corpus. Paired with the progress in learned representations, the similarity metrics used to compare representations of documents are also evolving, with numerous proposals differing in computation time or interpretability. In this paper we propose an extension to a specific emerging hybrid document distance metric which combines topic models and word embeddings: the Hierarchical Optimal Topic Transport (HOTT). In specific, we extend HOTT by using context-enhanced word representations. We provide a validation of our approach on public datasets, using the language model BERT for a document categorization task. Results indicate competitive performance of the extended HOTT metric. We furthermore apply the HOTT metric and its extension to support educational media research, with a retrieval task of matching topics in German curricula to educational textbooks passages, along with offering an auxiliary explanatory document representing the dominant topic of the retrieved document. In a user study, our explanation method is preferred over regular topic keywords.</abstract>
<identifier type="citekey">wehnert-etal-2021-hotter-hierarchical</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.418</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.418</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>4856</start>
<end>4866</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HOTTER: Hierarchical Optimal Topic Transport with Explanatory Context Representations
%A Wehnert, Sabine
%A Scheel, Christian
%A Szakács-Behling, Simona
%A Nieländer, Maret
%A Mielke, Patrick
%A De Luca, Ernesto William
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F wehnert-etal-2021-hotter-hierarchical
%X Natural language processing (NLP) is often the backbone of today’s systems for user interactions, information retrieval and others. Many of such NLP applications rely on specialized learned representations (e.g. neural word embeddings, topic models) that improve the ability to reason about the relationships between documents of a corpus. Paired with the progress in learned representations, the similarity metrics used to compare representations of documents are also evolving, with numerous proposals differing in computation time or interpretability. In this paper we propose an extension to a specific emerging hybrid document distance metric which combines topic models and word embeddings: the Hierarchical Optimal Topic Transport (HOTT). In specific, we extend HOTT by using context-enhanced word representations. We provide a validation of our approach on public datasets, using the language model BERT for a document categorization task. Results indicate competitive performance of the extended HOTT metric. We furthermore apply the HOTT metric and its extension to support educational media research, with a retrieval task of matching topics in German curricula to educational textbooks passages, along with offering an auxiliary explanatory document representing the dominant topic of the retrieved document. In a user study, our explanation method is preferred over regular topic keywords.
%R 10.18653/v1/2021.findings-emnlp.418
%U https://aclanthology.org/2021.findings-emnlp.418
%U https://doi.org/10.18653/v1/2021.findings-emnlp.418
%P 4856-4866
Markdown (Informal)
[HOTTER: Hierarchical Optimal Topic Transport with Explanatory Context Representations](https://aclanthology.org/2021.findings-emnlp.418) (Wehnert et al., Findings 2021)
ACL