@inproceedings{arora-etal-2023-jambu,
title = "Jambu: A historical linguistic database for {S}outh {A}sian languages",
author = "Arora, Aryaman and
Farris, Adam and
Basu, Samopriya and
Kolichala, Suresh",
editor = {Nicolai, Garrett and
Chodroff, Eleanor and
Mailhot, Frederic and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
booktitle = "Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigmorphon-1.8",
doi = "10.18653/v1/2023.sigmorphon-1.8",
pages = "68--77",
abstract = "We introduce JAMBU, a cognate database of South Asian languages which unifies dozens of previous sources in a structured and accessible format. The database includes nearly 287k lemmata from 602 lects, grouped together in 23k sets of cognates. We outline the data wrangling necessary to compile the dataset and train neural models for reflex prediction on the Indo- Aryan subset of the data. We hope that JAMBU is an invaluable resource for all historical linguists and Indologists, and look towards further improvement and expansion of the database.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arora-etal-2023-jambu">
<titleInfo>
<title>Jambu: A historical linguistic database for South Asian languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aryaman</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Farris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samopriya</namePart>
<namePart type="family">Basu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suresh</namePart>
<namePart type="family">Kolichala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleanor</namePart>
<namePart type="family">Chodroff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Mailhot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce JAMBU, a cognate database of South Asian languages which unifies dozens of previous sources in a structured and accessible format. The database includes nearly 287k lemmata from 602 lects, grouped together in 23k sets of cognates. We outline the data wrangling necessary to compile the dataset and train neural models for reflex prediction on the Indo- Aryan subset of the data. We hope that JAMBU is an invaluable resource for all historical linguists and Indologists, and look towards further improvement and expansion of the database.</abstract>
<identifier type="citekey">arora-etal-2023-jambu</identifier>
<identifier type="doi">10.18653/v1/2023.sigmorphon-1.8</identifier>
<location>
<url>https://aclanthology.org/2023.sigmorphon-1.8</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>68</start>
<end>77</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Jambu: A historical linguistic database for South Asian languages
%A Arora, Aryaman
%A Farris, Adam
%A Basu, Samopriya
%A Kolichala, Suresh
%Y Nicolai, Garrett
%Y Chodroff, Eleanor
%Y Mailhot, Frederic
%Y Çöltekin, Çağrı
%S Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F arora-etal-2023-jambu
%X We introduce JAMBU, a cognate database of South Asian languages which unifies dozens of previous sources in a structured and accessible format. The database includes nearly 287k lemmata from 602 lects, grouped together in 23k sets of cognates. We outline the data wrangling necessary to compile the dataset and train neural models for reflex prediction on the Indo- Aryan subset of the data. We hope that JAMBU is an invaluable resource for all historical linguists and Indologists, and look towards further improvement and expansion of the database.
%R 10.18653/v1/2023.sigmorphon-1.8
%U https://aclanthology.org/2023.sigmorphon-1.8
%U https://doi.org/10.18653/v1/2023.sigmorphon-1.8
%P 68-77
Markdown (Informal)
[Jambu: A historical linguistic database for South Asian languages](https://aclanthology.org/2023.sigmorphon-1.8) (Arora et al., SIGMORPHON 2023)
ACL
- Aryaman Arora, Adam Farris, Samopriya Basu, and Suresh Kolichala. 2023. Jambu: A historical linguistic database for South Asian languages. In Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology, pages 68–77, Toronto, Canada. Association for Computational Linguistics.