@inproceedings{paz-argaman-etal-2023-hegel,
title = "{H}e{G}e{L}: A Novel Dataset for Geo-Location from {H}ebrew Text",
author = "Paz-Argaman, Tzuf and
Bauman, Tal and
Mondshine, Itai and
Omer, Itzhak and
Dalyot, Sagi and
Tsarfaty, Reut",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.460",
doi = "10.18653/v1/2023.findings-acl.460",
pages = "7311--7321",
abstract = "The task of textual geolocation {---} retrieving the coordinates of a place based on a free-form language description {---} calls for not only grounding but also natural language understanding and geospatial reasoning. Even though there are quite a few datasets in English used for geolocation, they are currently based on open-source data (Wikipedia and Twitter), where the location of the described place is mostly implicit, such that the location retrieval resolution is limited. Furthermore, there are no datasets available for addressing the problem of textual geolocation in morphologically rich and resource-poor languages, such as Hebrew. In this paper, we present the Hebrew Geo-Location (HeGeL) corpus, designed to collect literal place descriptions and analyze lingual geospatial reasoning. We crowdsourced 5,649 literal Hebrew place descriptions of various place types in three cities in Israel. Qualitative and empirical analysis show that the data exhibits abundant use of geospatial reasoning and requires a novel environmental representation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paz-argaman-etal-2023-hegel">
<titleInfo>
<title>HeGeL: A Novel Dataset for Geo-Location from Hebrew Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tzuf</namePart>
<namePart type="family">Paz-Argaman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Bauman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Itai</namePart>
<namePart type="family">Mondshine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Itzhak</namePart>
<namePart type="family">Omer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sagi</namePart>
<namePart type="family">Dalyot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of textual geolocation — retrieving the coordinates of a place based on a free-form language description — calls for not only grounding but also natural language understanding and geospatial reasoning. Even though there are quite a few datasets in English used for geolocation, they are currently based on open-source data (Wikipedia and Twitter), where the location of the described place is mostly implicit, such that the location retrieval resolution is limited. Furthermore, there are no datasets available for addressing the problem of textual geolocation in morphologically rich and resource-poor languages, such as Hebrew. In this paper, we present the Hebrew Geo-Location (HeGeL) corpus, designed to collect literal place descriptions and analyze lingual geospatial reasoning. We crowdsourced 5,649 literal Hebrew place descriptions of various place types in three cities in Israel. Qualitative and empirical analysis show that the data exhibits abundant use of geospatial reasoning and requires a novel environmental representation.</abstract>
<identifier type="citekey">paz-argaman-etal-2023-hegel</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.460</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.460</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>7311</start>
<end>7321</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HeGeL: A Novel Dataset for Geo-Location from Hebrew Text
%A Paz-Argaman, Tzuf
%A Bauman, Tal
%A Mondshine, Itai
%A Omer, Itzhak
%A Dalyot, Sagi
%A Tsarfaty, Reut
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F paz-argaman-etal-2023-hegel
%X The task of textual geolocation — retrieving the coordinates of a place based on a free-form language description — calls for not only grounding but also natural language understanding and geospatial reasoning. Even though there are quite a few datasets in English used for geolocation, they are currently based on open-source data (Wikipedia and Twitter), where the location of the described place is mostly implicit, such that the location retrieval resolution is limited. Furthermore, there are no datasets available for addressing the problem of textual geolocation in morphologically rich and resource-poor languages, such as Hebrew. In this paper, we present the Hebrew Geo-Location (HeGeL) corpus, designed to collect literal place descriptions and analyze lingual geospatial reasoning. We crowdsourced 5,649 literal Hebrew place descriptions of various place types in three cities in Israel. Qualitative and empirical analysis show that the data exhibits abundant use of geospatial reasoning and requires a novel environmental representation.
%R 10.18653/v1/2023.findings-acl.460
%U https://aclanthology.org/2023.findings-acl.460
%U https://doi.org/10.18653/v1/2023.findings-acl.460
%P 7311-7321
Markdown (Informal)
[HeGeL: A Novel Dataset for Geo-Location from Hebrew Text](https://aclanthology.org/2023.findings-acl.460) (Paz-Argaman et al., Findings 2023)
ACL
- Tzuf Paz-Argaman, Tal Bauman, Itai Mondshine, Itzhak Omer, Sagi Dalyot, and Reut Tsarfaty. 2023. HeGeL: A Novel Dataset for Geo-Location from Hebrew Text. In Findings of the Association for Computational Linguistics: ACL 2023, pages 7311–7321, Toronto, Canada. Association for Computational Linguistics.