@inproceedings{nagpal-etal-2017-entity,
title = "An Entity Resolution Approach to Isolate Instances of Human Trafficking Online",
author = "Nagpal, Chirag and
Miller, Kyle and
Boecking, Benedikt and
Dubrawski, Artur",
editor = "Derczynski, Leon and
Xu, Wei and
Ritter, Alan and
Baldwin, Tim",
booktitle = "Proceedings of the 3rd Workshop on Noisy User-generated Text",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4411",
doi = "10.18653/v1/W17-4411",
pages = "77--84",
abstract = "Human trafficking is a challenging law enforcement problem, and traces of victims of such activity manifest as {`}escort advertisements{'} on various online forums. Given the large, heterogeneous and noisy structure of this data, building models to predict instances of trafficking is a convoluted task. In this paper we propose an entity resolution pipeline using a notion of proxy labels, in order to extract clusters from this data with prior history of human trafficking activity. We apply this pipeline to 5M records from backpage.com and report on the performance of this approach, challenges in terms of scalability, and some significant domain specific characteristics of our resolved entities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nagpal-etal-2017-entity">
<titleInfo>
<title>An Entity Resolution Approach to Isolate Instances of Human Trafficking Online</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chirag</namePart>
<namePart type="family">Nagpal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Miller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benedikt</namePart>
<namePart type="family">Boecking</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artur</namePart>
<namePart type="family">Dubrawski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Noisy User-generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Human trafficking is a challenging law enforcement problem, and traces of victims of such activity manifest as ‘escort advertisements’ on various online forums. Given the large, heterogeneous and noisy structure of this data, building models to predict instances of trafficking is a convoluted task. In this paper we propose an entity resolution pipeline using a notion of proxy labels, in order to extract clusters from this data with prior history of human trafficking activity. We apply this pipeline to 5M records from backpage.com and report on the performance of this approach, challenges in terms of scalability, and some significant domain specific characteristics of our resolved entities.</abstract>
<identifier type="citekey">nagpal-etal-2017-entity</identifier>
<identifier type="doi">10.18653/v1/W17-4411</identifier>
<location>
<url>https://aclanthology.org/W17-4411</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>77</start>
<end>84</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Entity Resolution Approach to Isolate Instances of Human Trafficking Online
%A Nagpal, Chirag
%A Miller, Kyle
%A Boecking, Benedikt
%A Dubrawski, Artur
%Y Derczynski, Leon
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%S Proceedings of the 3rd Workshop on Noisy User-generated Text
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F nagpal-etal-2017-entity
%X Human trafficking is a challenging law enforcement problem, and traces of victims of such activity manifest as ‘escort advertisements’ on various online forums. Given the large, heterogeneous and noisy structure of this data, building models to predict instances of trafficking is a convoluted task. In this paper we propose an entity resolution pipeline using a notion of proxy labels, in order to extract clusters from this data with prior history of human trafficking activity. We apply this pipeline to 5M records from backpage.com and report on the performance of this approach, challenges in terms of scalability, and some significant domain specific characteristics of our resolved entities.
%R 10.18653/v1/W17-4411
%U https://aclanthology.org/W17-4411
%U https://doi.org/10.18653/v1/W17-4411
%P 77-84
Markdown (Informal)
[An Entity Resolution Approach to Isolate Instances of Human Trafficking Online](https://aclanthology.org/W17-4411) (Nagpal et al., WNUT 2017)
ACL