@inproceedings{wolohan-etal-2018-detecting,
title = "Detecting Linguistic Traces of Depression in Topic-Restricted Text: Attending to Self-Stigmatized Depression with {NLP}",
author = "Wolohan, JT and
Hiraga, Misato and
Mukherjee, Atreyee and
Sayyed, Zeeshan Ali and
Millard, Matthew",
editor = "Sinha, Manjira and
Dasgupta, Tirthankar",
booktitle = "Proceedings of the First International Workshop on Language Cognition and Computational Models",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4102",
pages = "11--21",
abstract = "Natural language processing researchers have proven the ability of machine learning approaches to detect depression-related cues from language; however, to date, these efforts have primarily assumed it was acceptable to leave depression-related texts in the data. Our concerns with this are twofold: first, that the models may be overfitting on depression-related signals, which may not be present in all depressed users (only those who talk about depression on social media); and second, that these models would under-perform for users who are sensitive to the public stigma of depression. This study demonstrates the validity to those concerns. We construct a novel corpus of texts from 12,106 Reddit users and perform lexical and predictive analyses under two conditions: one where all text produced by the users is included and one where the depression data is withheld. We find significant differences in the language used by depressed users under the two conditions as well as a difference in the ability of machine learning algorithms to correctly detect depression. However, despite the lexical differences and reduced classification performance{--}each of which suggests that users may be able to fool algorithms by avoiding direct discussion of depression{--}a still respectable overall performance suggests lexical models are reasonably robust and well suited for a role in a diagnostic or monitoring capacity.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wolohan-etal-2018-detecting">
<titleInfo>
<title>Detecting Linguistic Traces of Depression in Topic-Restricted Text: Attending to Self-Stigmatized Depression with NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">J</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Wolohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Misato</namePart>
<namePart type="family">Hiraga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atreyee</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeeshan</namePart>
<namePart type="given">Ali</namePart>
<namePart type="family">Sayyed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Millard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First International Workshop on Language Cognition and Computational Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manjira</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Dasgupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Natural language processing researchers have proven the ability of machine learning approaches to detect depression-related cues from language; however, to date, these efforts have primarily assumed it was acceptable to leave depression-related texts in the data. Our concerns with this are twofold: first, that the models may be overfitting on depression-related signals, which may not be present in all depressed users (only those who talk about depression on social media); and second, that these models would under-perform for users who are sensitive to the public stigma of depression. This study demonstrates the validity to those concerns. We construct a novel corpus of texts from 12,106 Reddit users and perform lexical and predictive analyses under two conditions: one where all text produced by the users is included and one where the depression data is withheld. We find significant differences in the language used by depressed users under the two conditions as well as a difference in the ability of machine learning algorithms to correctly detect depression. However, despite the lexical differences and reduced classification performance–each of which suggests that users may be able to fool algorithms by avoiding direct discussion of depression–a still respectable overall performance suggests lexical models are reasonably robust and well suited for a role in a diagnostic or monitoring capacity.</abstract>
<identifier type="citekey">wolohan-etal-2018-detecting</identifier>
<location>
<url>https://aclanthology.org/W18-4102</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>11</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Linguistic Traces of Depression in Topic-Restricted Text: Attending to Self-Stigmatized Depression with NLP
%A Wolohan, J. T.
%A Hiraga, Misato
%A Mukherjee, Atreyee
%A Sayyed, Zeeshan Ali
%A Millard, Matthew
%Y Sinha, Manjira
%Y Dasgupta, Tirthankar
%S Proceedings of the First International Workshop on Language Cognition and Computational Models
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F wolohan-etal-2018-detecting
%X Natural language processing researchers have proven the ability of machine learning approaches to detect depression-related cues from language; however, to date, these efforts have primarily assumed it was acceptable to leave depression-related texts in the data. Our concerns with this are twofold: first, that the models may be overfitting on depression-related signals, which may not be present in all depressed users (only those who talk about depression on social media); and second, that these models would under-perform for users who are sensitive to the public stigma of depression. This study demonstrates the validity to those concerns. We construct a novel corpus of texts from 12,106 Reddit users and perform lexical and predictive analyses under two conditions: one where all text produced by the users is included and one where the depression data is withheld. We find significant differences in the language used by depressed users under the two conditions as well as a difference in the ability of machine learning algorithms to correctly detect depression. However, despite the lexical differences and reduced classification performance–each of which suggests that users may be able to fool algorithms by avoiding direct discussion of depression–a still respectable overall performance suggests lexical models are reasonably robust and well suited for a role in a diagnostic or monitoring capacity.
%U https://aclanthology.org/W18-4102
%P 11-21
Markdown (Informal)
[Detecting Linguistic Traces of Depression in Topic-Restricted Text: Attending to Self-Stigmatized Depression with NLP](https://aclanthology.org/W18-4102) (Wolohan et al., LCCM 2018)
ACL