@inproceedings{alokaili-etal-2019-ranking,
title = "Re-Ranking Words to Improve Interpretability of Automatically Generated Topics",
author = "Alokaili, Areej and
Aletras, Nikolaos and
Stevenson, Mark",
editor = "Dobnik, Simon and
Chatzikyriakidis, Stergios and
Demberg, Vera",
booktitle = "Proceedings of the 13th International Conference on Computational Semantics - Long Papers",
month = may,
year = "2019",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-0404",
doi = "10.18653/v1/W19-0404",
pages = "43--54",
abstract = "Topics models, such as LDA, are widely used in Natural Language Processing. Making their output interpretable is an important area of research with applications to areas such as the enhancement of exploratory search interfaces and the development of interpretable machine learning models. Conventionally, topics are represented by their n most probable words, however, these representations are often difficult for humans to interpret. This paper explores the re-ranking of topic words to generate more interpretable topic representations. A range of approaches are compared and evaluated in two experiments. The first uses crowdworkers to associate topics represented by different word rankings with related documents. The second experiment is an automatic approach based on a document retrieval task applied on multiple domains. Results in both experiments demonstrate that re-ranking words improves topic interpretability and that the most effective re-ranking schemes were those which combine information about the importance of words both within topics and their relative frequency in the entire corpus. In addition, close correlation between the results of the two evaluation approaches suggests that the automatic method proposed here could be used to evaluate re-ranking methods without the need for human judgements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alokaili-etal-2019-ranking">
<titleInfo>
<title>Re-Ranking Words to Improve Interpretability of Automatically Generated Topics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Areej</namePart>
<namePart type="family">Alokaili</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Stevenson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Computational Semantics - Long Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stergios</namePart>
<namePart type="family">Chatzikyriakidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Topics models, such as LDA, are widely used in Natural Language Processing. Making their output interpretable is an important area of research with applications to areas such as the enhancement of exploratory search interfaces and the development of interpretable machine learning models. Conventionally, topics are represented by their n most probable words, however, these representations are often difficult for humans to interpret. This paper explores the re-ranking of topic words to generate more interpretable topic representations. A range of approaches are compared and evaluated in two experiments. The first uses crowdworkers to associate topics represented by different word rankings with related documents. The second experiment is an automatic approach based on a document retrieval task applied on multiple domains. Results in both experiments demonstrate that re-ranking words improves topic interpretability and that the most effective re-ranking schemes were those which combine information about the importance of words both within topics and their relative frequency in the entire corpus. In addition, close correlation between the results of the two evaluation approaches suggests that the automatic method proposed here could be used to evaluate re-ranking methods without the need for human judgements.</abstract>
<identifier type="citekey">alokaili-etal-2019-ranking</identifier>
<identifier type="doi">10.18653/v1/W19-0404</identifier>
<location>
<url>https://aclanthology.org/W19-0404</url>
</location>
<part>
<date>2019-05</date>
<extent unit="page">
<start>43</start>
<end>54</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Re-Ranking Words to Improve Interpretability of Automatically Generated Topics
%A Alokaili, Areej
%A Aletras, Nikolaos
%A Stevenson, Mark
%Y Dobnik, Simon
%Y Chatzikyriakidis, Stergios
%Y Demberg, Vera
%S Proceedings of the 13th International Conference on Computational Semantics - Long Papers
%D 2019
%8 May
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F alokaili-etal-2019-ranking
%X Topics models, such as LDA, are widely used in Natural Language Processing. Making their output interpretable is an important area of research with applications to areas such as the enhancement of exploratory search interfaces and the development of interpretable machine learning models. Conventionally, topics are represented by their n most probable words, however, these representations are often difficult for humans to interpret. This paper explores the re-ranking of topic words to generate more interpretable topic representations. A range of approaches are compared and evaluated in two experiments. The first uses crowdworkers to associate topics represented by different word rankings with related documents. The second experiment is an automatic approach based on a document retrieval task applied on multiple domains. Results in both experiments demonstrate that re-ranking words improves topic interpretability and that the most effective re-ranking schemes were those which combine information about the importance of words both within topics and their relative frequency in the entire corpus. In addition, close correlation between the results of the two evaluation approaches suggests that the automatic method proposed here could be used to evaluate re-ranking methods without the need for human judgements.
%R 10.18653/v1/W19-0404
%U https://aclanthology.org/W19-0404
%U https://doi.org/10.18653/v1/W19-0404
%P 43-54
Markdown (Informal)
[Re-Ranking Words to Improve Interpretability of Automatically Generated Topics](https://aclanthology.org/W19-0404) (Alokaili et al., IWCS 2019)
ACL