@inproceedings{singh-etal-2025-geo,
title = "Geo-Spatially Informed Models for Geocoding Unstructured Addresses",
author = "Singh, Uddeshya and
Ravi Shankar, Devanapalli and
Bellala, Gowtham and
Goel, Vikas",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Darwish, Kareem and
Agarwal, Apoorv",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: Industry Track",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-industry.19/",
pages = "236--242",
abstract = "Geocoding customer addresses and determining precise locations is a crucial component for any e-commerce company. Shipment delivery costs make up a significant portion of overall expenses, and having exact customer locations not only improves operational efficiency but also reduces costs and enhances the customer experience. While state-of-the-art geocoding systems are well-suited for developed countries with structured city layouts and high-quality reference corpora, they are less effective in developing countries like India, where addresses are highly unstructured and reliable reference data is scarce. Recent research has focused on creating geocoding systems tailored for developing nations such as India. In this work, we propose a method to geocode addresses in such environments. We explored various approaches to incorporate geo-spatial relationships using an LLM backbone, which provided insights into how the model learns these relationships both explicitly and implicitly. Our proposed approach outperforms the current state-of-the-art system by 20{\%} in drift accuracy within 100 meters, and the state-of-the-art commercial system by 54{\%}. This has a potential to reduce the incorrect delivery hub assignments by 8{\%} which leads to significant customer experience improvements and business savings."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-etal-2025-geo">
<titleInfo>
<title>Geo-Spatially Informed Models for Geocoding Unstructured Addresses</title>
</titleInfo>
<name type="personal">
<namePart type="given">Uddeshya</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Devanapalli</namePart>
<namePart type="family">Ravi Shankar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gowtham</namePart>
<namePart type="family">Bellala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vikas</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apoorv</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Geocoding customer addresses and determining precise locations is a crucial component for any e-commerce company. Shipment delivery costs make up a significant portion of overall expenses, and having exact customer locations not only improves operational efficiency but also reduces costs and enhances the customer experience. While state-of-the-art geocoding systems are well-suited for developed countries with structured city layouts and high-quality reference corpora, they are less effective in developing countries like India, where addresses are highly unstructured and reliable reference data is scarce. Recent research has focused on creating geocoding systems tailored for developing nations such as India. In this work, we propose a method to geocode addresses in such environments. We explored various approaches to incorporate geo-spatial relationships using an LLM backbone, which provided insights into how the model learns these relationships both explicitly and implicitly. Our proposed approach outperforms the current state-of-the-art system by 20% in drift accuracy within 100 meters, and the state-of-the-art commercial system by 54%. This has a potential to reduce the incorrect delivery hub assignments by 8% which leads to significant customer experience improvements and business savings.</abstract>
<identifier type="citekey">singh-etal-2025-geo</identifier>
<location>
<url>https://aclanthology.org/2025.coling-industry.19/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>236</start>
<end>242</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Geo-Spatially Informed Models for Geocoding Unstructured Addresses
%A Singh, Uddeshya
%A Ravi Shankar, Devanapalli
%A Bellala, Gowtham
%A Goel, Vikas
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%Y Darwish, Kareem
%Y Agarwal, Apoorv
%S Proceedings of the 31st International Conference on Computational Linguistics: Industry Track
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F singh-etal-2025-geo
%X Geocoding customer addresses and determining precise locations is a crucial component for any e-commerce company. Shipment delivery costs make up a significant portion of overall expenses, and having exact customer locations not only improves operational efficiency but also reduces costs and enhances the customer experience. While state-of-the-art geocoding systems are well-suited for developed countries with structured city layouts and high-quality reference corpora, they are less effective in developing countries like India, where addresses are highly unstructured and reliable reference data is scarce. Recent research has focused on creating geocoding systems tailored for developing nations such as India. In this work, we propose a method to geocode addresses in such environments. We explored various approaches to incorporate geo-spatial relationships using an LLM backbone, which provided insights into how the model learns these relationships both explicitly and implicitly. Our proposed approach outperforms the current state-of-the-art system by 20% in drift accuracy within 100 meters, and the state-of-the-art commercial system by 54%. This has a potential to reduce the incorrect delivery hub assignments by 8% which leads to significant customer experience improvements and business savings.
%U https://aclanthology.org/2025.coling-industry.19/
%P 236-242
Markdown (Informal)
[Geo-Spatially Informed Models for Geocoding Unstructured Addresses](https://aclanthology.org/2025.coling-industry.19/) (Singh et al., COLING 2025)
ACL