@inproceedings{govind-sohoney-2022-learning,
title = "Learning Geolocations for Cold-Start and Hard-to-Resolve Addresses via Deep Metric Learning",
author = "{Govind} and
Sohoney, Saurabh",
editor = "Li, Yunyao and
Lazaridou, Angeliki",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-industry.33",
doi = "10.18653/v1/2022.emnlp-industry.33",
pages = "322--331",
abstract = "With evergrowing digital adoption in the society and increasing demand for businesses to deliver to customers doorstep, the last mile hop of transportation planning poses unique challenges in emerging geographies with unstructured addresses. One of the crucial inputs to facilitate effective planning is the task of geolocating customer addresses. Existing systems operate by aggregating historical delivery locations or by resolving/matching addresses to known buildings and campuses to vend a high-precision geolocation. However, by design they fail to cater to a significant fraction of addresses which are new in the system and have inaccurate or missing building level information. We propose a framework to resolve these addresses (referred to as hard-to-resolve henceforth) to a shallower granularity termed as neighbourhood. Specifically, we propose a weakly supervised deep metric learning model to encode the geospatial semantics in address embeddings. We present empirical evaluation on India (IN) and the United Arab Emirates (UAE) hard-to-resolve addresses to show significant improvements in learning geolocations i.e., 22{\%} (IN) {\&} 55{\%} (UAE) reduction in delivery defects (where learnt geocode is Y meters away from actual location), and 43{\%} (IN) {\&} 90{\%} (UAE) reduction in 50th percentile (p50) distance between learnt and actual delivery locations over the existing production system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="govind-sohoney-2022-learning">
<titleInfo>
<title>Learning Geolocations for Cold-Start and Hard-to-Resolve Addresses via Deep Metric Learning</title>
</titleInfo>
<name>
<namePart>Govind</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="family">Sohoney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angeliki</namePart>
<namePart type="family">Lazaridou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With evergrowing digital adoption in the society and increasing demand for businesses to deliver to customers doorstep, the last mile hop of transportation planning poses unique challenges in emerging geographies with unstructured addresses. One of the crucial inputs to facilitate effective planning is the task of geolocating customer addresses. Existing systems operate by aggregating historical delivery locations or by resolving/matching addresses to known buildings and campuses to vend a high-precision geolocation. However, by design they fail to cater to a significant fraction of addresses which are new in the system and have inaccurate or missing building level information. We propose a framework to resolve these addresses (referred to as hard-to-resolve henceforth) to a shallower granularity termed as neighbourhood. Specifically, we propose a weakly supervised deep metric learning model to encode the geospatial semantics in address embeddings. We present empirical evaluation on India (IN) and the United Arab Emirates (UAE) hard-to-resolve addresses to show significant improvements in learning geolocations i.e., 22% (IN) & 55% (UAE) reduction in delivery defects (where learnt geocode is Y meters away from actual location), and 43% (IN) & 90% (UAE) reduction in 50th percentile (p50) distance between learnt and actual delivery locations over the existing production system.</abstract>
<identifier type="citekey">govind-sohoney-2022-learning</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-industry.33</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-industry.33</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>322</start>
<end>331</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Geolocations for Cold-Start and Hard-to-Resolve Addresses via Deep Metric Learning
%A Sohoney, Saurabh
%Y Li, Yunyao
%Y Lazaridou, Angeliki
%A Govind
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F govind-sohoney-2022-learning
%X With evergrowing digital adoption in the society and increasing demand for businesses to deliver to customers doorstep, the last mile hop of transportation planning poses unique challenges in emerging geographies with unstructured addresses. One of the crucial inputs to facilitate effective planning is the task of geolocating customer addresses. Existing systems operate by aggregating historical delivery locations or by resolving/matching addresses to known buildings and campuses to vend a high-precision geolocation. However, by design they fail to cater to a significant fraction of addresses which are new in the system and have inaccurate or missing building level information. We propose a framework to resolve these addresses (referred to as hard-to-resolve henceforth) to a shallower granularity termed as neighbourhood. Specifically, we propose a weakly supervised deep metric learning model to encode the geospatial semantics in address embeddings. We present empirical evaluation on India (IN) and the United Arab Emirates (UAE) hard-to-resolve addresses to show significant improvements in learning geolocations i.e., 22% (IN) & 55% (UAE) reduction in delivery defects (where learnt geocode is Y meters away from actual location), and 43% (IN) & 90% (UAE) reduction in 50th percentile (p50) distance between learnt and actual delivery locations over the existing production system.
%R 10.18653/v1/2022.emnlp-industry.33
%U https://aclanthology.org/2022.emnlp-industry.33
%U https://doi.org/10.18653/v1/2022.emnlp-industry.33
%P 322-331
Markdown (Informal)
[Learning Geolocations for Cold-Start and Hard-to-Resolve Addresses via Deep Metric Learning](https://aclanthology.org/2022.emnlp-industry.33) (Govind & Sohoney, EMNLP 2022)
ACL