@inproceedings{todd-etal-2020-unsupervised,
title = "Unsupervised Anomaly Detection in Parole Hearings using Language Models",
author = "Todd, Graham and
Voss, Catalin and
Hong, Jenny",
editor = "Bamman, David and
Hovy, Dirk and
Jurgens, David and
O'Connor, Brendan and
Volkova, Svitlana",
booktitle = "Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlpcss-1.8",
doi = "10.18653/v1/2020.nlpcss-1.8",
pages = "66--71",
abstract = "Each year, thousands of roughly 150-page parole hearing transcripts in California go unread because legal experts lack the time to review them. Yet, reviewing transcripts is the only means of public oversight in the parole process. To assist reviewers, we present a simple unsupervised technique for using language models (LMs) to identify procedural anomalies in long-form legal text. Our technique highlights unusual passages that suggest further review could be necessary. We utilize a contrastive perplexity score to identify passages, defined as the scaled difference between its perplexities from two LMs, one fine-tuned on the target (parole) domain, and another pre-trained on out-of-domain text to normalize for grammatical or syntactic anomalies. We present quantitative analysis of the results and note that our method has identified some important cases for review. We are also excited about potential applications in unsupervised anomaly detection, and present a brief analysis of results for detecting fake TripAdvisor reviews.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="todd-etal-2020-unsupervised">
<titleInfo>
<title>Unsupervised Anomaly Detection in Parole Hearings using Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Todd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalin</namePart>
<namePart type="family">Voss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jenny</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Volkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Each year, thousands of roughly 150-page parole hearing transcripts in California go unread because legal experts lack the time to review them. Yet, reviewing transcripts is the only means of public oversight in the parole process. To assist reviewers, we present a simple unsupervised technique for using language models (LMs) to identify procedural anomalies in long-form legal text. Our technique highlights unusual passages that suggest further review could be necessary. We utilize a contrastive perplexity score to identify passages, defined as the scaled difference between its perplexities from two LMs, one fine-tuned on the target (parole) domain, and another pre-trained on out-of-domain text to normalize for grammatical or syntactic anomalies. We present quantitative analysis of the results and note that our method has identified some important cases for review. We are also excited about potential applications in unsupervised anomaly detection, and present a brief analysis of results for detecting fake TripAdvisor reviews.</abstract>
<identifier type="citekey">todd-etal-2020-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2020.nlpcss-1.8</identifier>
<location>
<url>https://aclanthology.org/2020.nlpcss-1.8</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>66</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Anomaly Detection in Parole Hearings using Language Models
%A Todd, Graham
%A Voss, Catalin
%A Hong, Jenny
%Y Bamman, David
%Y Hovy, Dirk
%Y Jurgens, David
%Y O’Connor, Brendan
%Y Volkova, Svitlana
%S Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F todd-etal-2020-unsupervised
%X Each year, thousands of roughly 150-page parole hearing transcripts in California go unread because legal experts lack the time to review them. Yet, reviewing transcripts is the only means of public oversight in the parole process. To assist reviewers, we present a simple unsupervised technique for using language models (LMs) to identify procedural anomalies in long-form legal text. Our technique highlights unusual passages that suggest further review could be necessary. We utilize a contrastive perplexity score to identify passages, defined as the scaled difference between its perplexities from two LMs, one fine-tuned on the target (parole) domain, and another pre-trained on out-of-domain text to normalize for grammatical or syntactic anomalies. We present quantitative analysis of the results and note that our method has identified some important cases for review. We are also excited about potential applications in unsupervised anomaly detection, and present a brief analysis of results for detecting fake TripAdvisor reviews.
%R 10.18653/v1/2020.nlpcss-1.8
%U https://aclanthology.org/2020.nlpcss-1.8
%U https://doi.org/10.18653/v1/2020.nlpcss-1.8
%P 66-71
Markdown (Informal)
[Unsupervised Anomaly Detection in Parole Hearings using Language Models](https://aclanthology.org/2020.nlpcss-1.8) (Todd et al., NLP+CSS 2020)
ACL