@inproceedings{lawley-etal-2023-statistical,
title = "A Statistical Approach for Quantifying Group Difference in Topic Distributions Using Clinical Discourse Samples",
author = "Lawley, Grace O. and
Heeman, Peter A. and
Dolata, Jill K. and
Fombonne, Eric and
Bedrick, Steven",
editor = "Stoyanchev, Svetlana and
Joty, Shafiq and
Schlangen, David and
Dusek, Ondrej and
Kennington, Casey and
Alikhani, Malihe",
booktitle = "Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigdial-1.5/",
doi = "10.18653/v1/2023.sigdial-1.5",
pages = "55--65",
abstract = "Topic distribution matrices created by topic models are typically used for document classification or as features in a separate machine learning algorithm. Existing methods for evaluating these topic distributions include metrics such as coherence and perplexity; however, there is a lack of statistically grounded evaluation tools. We present a statistical method for investigating group differences in the document-topic distribution vectors created by Latent Dirichlet Allocation (LDA) that uses Aitchison geometry to transform the vectors, multivariate analysis of variance (MANOVA) to compare sample means, and partial eta squared to calculate effect size. Using a corpus of dialogues between Autistic and Typically Developing (TD) children and trained examiners, we found that the topic distributions of Autistic children differed from those of TD children when responding to questions about social difficulties (p = .0083, partial eta squared = .19). Furthermore, the examiners' topic distributions differed between the Autistic and TD groups when discussing emotions (p = .0035, partial eta squared = .20), social difficulties (p {\ensuremath{<}} .001, partial eta squared = .30), and friends (p = .0224, partial eta squared = .17). These results support the use of topic modeling in studying clinically relevant features of social communication such as topic maintenance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lawley-etal-2023-statistical">
<titleInfo>
<title>A Statistical Approach for Quantifying Group Difference in Topic Distributions Using Clinical Discourse Samples</title>
</titleInfo>
<name type="personal">
<namePart type="given">Grace</namePart>
<namePart type="given">O</namePart>
<namePart type="family">Lawley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Heeman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Dolata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Fombonne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bedrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Svetlana</namePart>
<namePart type="family">Stoyanchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shafiq</namePart>
<namePart type="family">Joty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondrej</namePart>
<namePart type="family">Dusek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Casey</namePart>
<namePart type="family">Kennington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malihe</namePart>
<namePart type="family">Alikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czechia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Topic distribution matrices created by topic models are typically used for document classification or as features in a separate machine learning algorithm. Existing methods for evaluating these topic distributions include metrics such as coherence and perplexity; however, there is a lack of statistically grounded evaluation tools. We present a statistical method for investigating group differences in the document-topic distribution vectors created by Latent Dirichlet Allocation (LDA) that uses Aitchison geometry to transform the vectors, multivariate analysis of variance (MANOVA) to compare sample means, and partial eta squared to calculate effect size. Using a corpus of dialogues between Autistic and Typically Developing (TD) children and trained examiners, we found that the topic distributions of Autistic children differed from those of TD children when responding to questions about social difficulties (p = .0083, partial eta squared = .19). Furthermore, the examiners’ topic distributions differed between the Autistic and TD groups when discussing emotions (p = .0035, partial eta squared = .20), social difficulties (p \ensuremath< .001, partial eta squared = .30), and friends (p = .0224, partial eta squared = .17). These results support the use of topic modeling in studying clinically relevant features of social communication such as topic maintenance.</abstract>
<identifier type="citekey">lawley-etal-2023-statistical</identifier>
<identifier type="doi">10.18653/v1/2023.sigdial-1.5</identifier>
<location>
<url>https://aclanthology.org/2023.sigdial-1.5/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>55</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Statistical Approach for Quantifying Group Difference in Topic Distributions Using Clinical Discourse Samples
%A Lawley, Grace O.
%A Heeman, Peter A.
%A Dolata, Jill K.
%A Fombonne, Eric
%A Bedrick, Steven
%Y Stoyanchev, Svetlana
%Y Joty, Shafiq
%Y Schlangen, David
%Y Dusek, Ondrej
%Y Kennington, Casey
%Y Alikhani, Malihe
%S Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czechia
%F lawley-etal-2023-statistical
%X Topic distribution matrices created by topic models are typically used for document classification or as features in a separate machine learning algorithm. Existing methods for evaluating these topic distributions include metrics such as coherence and perplexity; however, there is a lack of statistically grounded evaluation tools. We present a statistical method for investigating group differences in the document-topic distribution vectors created by Latent Dirichlet Allocation (LDA) that uses Aitchison geometry to transform the vectors, multivariate analysis of variance (MANOVA) to compare sample means, and partial eta squared to calculate effect size. Using a corpus of dialogues between Autistic and Typically Developing (TD) children and trained examiners, we found that the topic distributions of Autistic children differed from those of TD children when responding to questions about social difficulties (p = .0083, partial eta squared = .19). Furthermore, the examiners’ topic distributions differed between the Autistic and TD groups when discussing emotions (p = .0035, partial eta squared = .20), social difficulties (p \ensuremath< .001, partial eta squared = .30), and friends (p = .0224, partial eta squared = .17). These results support the use of topic modeling in studying clinically relevant features of social communication such as topic maintenance.
%R 10.18653/v1/2023.sigdial-1.5
%U https://aclanthology.org/2023.sigdial-1.5/
%U https://doi.org/10.18653/v1/2023.sigdial-1.5
%P 55-65
Markdown (Informal)
[A Statistical Approach for Quantifying Group Difference in Topic Distributions Using Clinical Discourse Samples](https://aclanthology.org/2023.sigdial-1.5/) (Lawley et al., SIGDIAL 2023)
ACL