@inproceedings{beauchemin-2023-deepparse,
title = "Deepparse : An Extendable, and Fine-Tunable State-Of-The-Art Library for Parsing Multinational Street Addresses",
author = "Beauchemin, David and
Yassine, Marouane",
editor = "Tan, Liling and
Milajevs, Dmitrijs and
Chauhan, Geeticka and
Gwinnup, Jeremy and
Rippeth, Elijah",
booktitle = "Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.nlposs-1.3",
doi = "10.18653/v1/2023.nlposs-1.3",
pages = "19--24",
abstract = "Segmenting an address into meaningful components, also known as address parsing, is an essential step in many applications from record linkage to geocoding and package delivery. Consequently, a lot of work has been dedicated to develop accurate address parsing techniques, with machine learning and neural network methods leading the state-of-the-art scoreboard. However, most of the work on address parsing has been confined to academic endeavours with little availability of free and easy-to-use open-source solutions.This paper presents Deepparse, a Python open-source, extendable, fine-tunable address parsing solution under LGPL-3.0 licence to parse multinational addresses using state-of-the-art deep learning algorithms and evaluated on over 60 countries. It can parse addresses written in any language and use any address standard. The pre-trained model achieves average 99{\%} parsing accuracies on the countries used for training with no pre-processing nor post-processing needed. Moreover, the library supports fine-tuning with new data to generate a custom address parser.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="beauchemin-2023-deepparse">
<titleInfo>
<title>Deepparse : An Extendable, and Fine-Tunable State-Of-The-Art Library for Parsing Multinational Street Addresses</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Beauchemin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marouane</namePart>
<namePart type="family">Yassine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liling</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dmitrijs</namePart>
<namePart type="family">Milajevs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geeticka</namePart>
<namePart type="family">Chauhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeremy</namePart>
<namePart type="family">Gwinnup</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elijah</namePart>
<namePart type="family">Rippeth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Segmenting an address into meaningful components, also known as address parsing, is an essential step in many applications from record linkage to geocoding and package delivery. Consequently, a lot of work has been dedicated to develop accurate address parsing techniques, with machine learning and neural network methods leading the state-of-the-art scoreboard. However, most of the work on address parsing has been confined to academic endeavours with little availability of free and easy-to-use open-source solutions.This paper presents Deepparse, a Python open-source, extendable, fine-tunable address parsing solution under LGPL-3.0 licence to parse multinational addresses using state-of-the-art deep learning algorithms and evaluated on over 60 countries. It can parse addresses written in any language and use any address standard. The pre-trained model achieves average 99% parsing accuracies on the countries used for training with no pre-processing nor post-processing needed. Moreover, the library supports fine-tuning with new data to generate a custom address parser.</abstract>
<identifier type="citekey">beauchemin-2023-deepparse</identifier>
<identifier type="doi">10.18653/v1/2023.nlposs-1.3</identifier>
<location>
<url>https://aclanthology.org/2023.nlposs-1.3</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>19</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Deepparse : An Extendable, and Fine-Tunable State-Of-The-Art Library for Parsing Multinational Street Addresses
%A Beauchemin, David
%A Yassine, Marouane
%Y Tan, Liling
%Y Milajevs, Dmitrijs
%Y Chauhan, Geeticka
%Y Gwinnup, Jeremy
%Y Rippeth, Elijah
%S Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023)
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F beauchemin-2023-deepparse
%X Segmenting an address into meaningful components, also known as address parsing, is an essential step in many applications from record linkage to geocoding and package delivery. Consequently, a lot of work has been dedicated to develop accurate address parsing techniques, with machine learning and neural network methods leading the state-of-the-art scoreboard. However, most of the work on address parsing has been confined to academic endeavours with little availability of free and easy-to-use open-source solutions.This paper presents Deepparse, a Python open-source, extendable, fine-tunable address parsing solution under LGPL-3.0 licence to parse multinational addresses using state-of-the-art deep learning algorithms and evaluated on over 60 countries. It can parse addresses written in any language and use any address standard. The pre-trained model achieves average 99% parsing accuracies on the countries used for training with no pre-processing nor post-processing needed. Moreover, the library supports fine-tuning with new data to generate a custom address parser.
%R 10.18653/v1/2023.nlposs-1.3
%U https://aclanthology.org/2023.nlposs-1.3
%U https://doi.org/10.18653/v1/2023.nlposs-1.3
%P 19-24
Markdown (Informal)
[Deepparse : An Extendable, and Fine-Tunable State-Of-The-Art Library for Parsing Multinational Street Addresses](https://aclanthology.org/2023.nlposs-1.3) (Beauchemin & Yassine, NLPOSS-WS 2023)
ACL