@inproceedings{kumar-le-2024-foto,
title = "{F}o{T}o: Targeted Visual Topic Modeling for Focused Analysis of Short Texts",
author = "Kumar, Sanuj and
Le, Tuan",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.653",
pages = "7406--7416",
abstract = "Given a corpus of documents, focused analysis aims to find topics relevant to aspects that a user is interested in. The aspects are often expressed by a set of keywords provided by the user. Short texts such as microblogs and tweets pose several challenges to this task because the sparsity of word co-occurrences may hinder the extraction of meaningful and relevant topics. Moreover, most of the existing topic models perform a full corpus analysis that treats all topics equally, which may make the learned topics not be on target. In this paper, we propose a novel targeted topic model for semantic short-text embedding which aims to learn all topics and low-dimensional visual representations of documents, while preserving relevant topics for focused analysis of short texts. To preserve the relevant topics in the visualization space, we propose jointly modeling topics and the pairwise document ranking based on document-keyword distances in the visualization space. The extensive experiments on several real-world datasets demonstrate the effectiveness of our proposed model in terms of targeted topic modeling and visualization.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-le-2024-foto">
<titleInfo>
<title>FoTo: Targeted Visual Topic Modeling for Focused Analysis of Short Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanuj</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tuan</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Given a corpus of documents, focused analysis aims to find topics relevant to aspects that a user is interested in. The aspects are often expressed by a set of keywords provided by the user. Short texts such as microblogs and tweets pose several challenges to this task because the sparsity of word co-occurrences may hinder the extraction of meaningful and relevant topics. Moreover, most of the existing topic models perform a full corpus analysis that treats all topics equally, which may make the learned topics not be on target. In this paper, we propose a novel targeted topic model for semantic short-text embedding which aims to learn all topics and low-dimensional visual representations of documents, while preserving relevant topics for focused analysis of short texts. To preserve the relevant topics in the visualization space, we propose jointly modeling topics and the pairwise document ranking based on document-keyword distances in the visualization space. The extensive experiments on several real-world datasets demonstrate the effectiveness of our proposed model in terms of targeted topic modeling and visualization.</abstract>
<identifier type="citekey">kumar-le-2024-foto</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.653</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7406</start>
<end>7416</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FoTo: Targeted Visual Topic Modeling for Focused Analysis of Short Texts
%A Kumar, Sanuj
%A Le, Tuan
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F kumar-le-2024-foto
%X Given a corpus of documents, focused analysis aims to find topics relevant to aspects that a user is interested in. The aspects are often expressed by a set of keywords provided by the user. Short texts such as microblogs and tweets pose several challenges to this task because the sparsity of word co-occurrences may hinder the extraction of meaningful and relevant topics. Moreover, most of the existing topic models perform a full corpus analysis that treats all topics equally, which may make the learned topics not be on target. In this paper, we propose a novel targeted topic model for semantic short-text embedding which aims to learn all topics and low-dimensional visual representations of documents, while preserving relevant topics for focused analysis of short texts. To preserve the relevant topics in the visualization space, we propose jointly modeling topics and the pairwise document ranking based on document-keyword distances in the visualization space. The extensive experiments on several real-world datasets demonstrate the effectiveness of our proposed model in terms of targeted topic modeling and visualization.
%U https://aclanthology.org/2024.lrec-main.653
%P 7406-7416
Markdown (Informal)
[FoTo: Targeted Visual Topic Modeling for Focused Analysis of Short Texts](https://aclanthology.org/2024.lrec-main.653) (Kumar & Le, LREC-COLING 2024)
ACL