@inproceedings{guimaraes-etal-2026-experimental,
title = "Experimental Evaluation of Topic Modeling Methods for Categorizing Irregularities in Health-related news",
author = "Guimar{\~a}es, Alysson and
Junior, Methanias Cola{\c{c}}o and
Almeida, Samuel and
Fontes, Raphael",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.5/",
pages = "41--56",
ISBN = "979-8-89176-387-6",
abstract = "Context: The increasing availability of textual data has driven the application of Natural Language Processing (NLP) techniques in public administration to improve public services. Objective: This study aims to analyze topic modeling methods in the context of public health audits conducted by the National Department of SUS Auditing (AudSUS). Methods: A controlled in vitro experiment was conducted to assess the performance of the methods in topic modeling tasks using coherence metrics. Results: The LSA method stood out among models with the highest average C{\_}V and C{\_}NPMI coherence. LSA-based models achieved superior performance compared to 215 other models in configurations with lower top-n and top-k values. Overall, the statistical analysis confirms that the observed differences among the models are not due to random variation. Conclusion: The results underscore the potential of topic modeling methods for clustering news articles that exhibit indications of irregularities, thereby guiding information retrieval during the analytical phase of the audit process. This approach enhances the overall effectiveness of audits and facilitates faster preparation of teams for the operational stage."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guimaraes-etal-2026-experimental">
<titleInfo>
<title>Experimental Evaluation of Topic Modeling Methods for Categorizing Irregularities in Health-related news</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alysson</namePart>
<namePart type="family">Guimarães</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Methanias</namePart>
<namePart type="given">Colaço</namePart>
<namePart type="family">Junior</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Almeida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raphael</namePart>
<namePart type="family">Fontes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Context: The increasing availability of textual data has driven the application of Natural Language Processing (NLP) techniques in public administration to improve public services. Objective: This study aims to analyze topic modeling methods in the context of public health audits conducted by the National Department of SUS Auditing (AudSUS). Methods: A controlled in vitro experiment was conducted to assess the performance of the methods in topic modeling tasks using coherence metrics. Results: The LSA method stood out among models with the highest average C_V and C_NPMI coherence. LSA-based models achieved superior performance compared to 215 other models in configurations with lower top-n and top-k values. Overall, the statistical analysis confirms that the observed differences among the models are not due to random variation. Conclusion: The results underscore the potential of topic modeling methods for clustering news articles that exhibit indications of irregularities, thereby guiding information retrieval during the analytical phase of the audit process. This approach enhances the overall effectiveness of audits and facilitates faster preparation of teams for the operational stage.</abstract>
<identifier type="citekey">guimaraes-etal-2026-experimental</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.5/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>41</start>
<end>56</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Experimental Evaluation of Topic Modeling Methods for Categorizing Irregularities in Health-related news
%A Guimarães, Alysson
%A Junior, Methanias Colaço
%A Almeida, Samuel
%A Fontes, Raphael
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F guimaraes-etal-2026-experimental
%X Context: The increasing availability of textual data has driven the application of Natural Language Processing (NLP) techniques in public administration to improve public services. Objective: This study aims to analyze topic modeling methods in the context of public health audits conducted by the National Department of SUS Auditing (AudSUS). Methods: A controlled in vitro experiment was conducted to assess the performance of the methods in topic modeling tasks using coherence metrics. Results: The LSA method stood out among models with the highest average C_V and C_NPMI coherence. LSA-based models achieved superior performance compared to 215 other models in configurations with lower top-n and top-k values. Overall, the statistical analysis confirms that the observed differences among the models are not due to random variation. Conclusion: The results underscore the potential of topic modeling methods for clustering news articles that exhibit indications of irregularities, thereby guiding information retrieval during the analytical phase of the audit process. This approach enhances the overall effectiveness of audits and facilitates faster preparation of teams for the operational stage.
%U https://aclanthology.org/2026.propor-1.5/
%P 41-56
Markdown (Informal)
[Experimental Evaluation of Topic Modeling Methods for Categorizing Irregularities in Health-related news](https://aclanthology.org/2026.propor-1.5/) (Guimarães et al., PROPOR 2026)
ACL