@inproceedings{forkel-etal-2024-linguistic,
title = "Linguistic Survey of {I}ndia and Polyglotta Africana: Two Retrostandardized Digital Editions of Large Historical Collections of Multilingual Wordlists",
author = "Forkel, Robert and
List, Johann-Mattis and
Rzymski, Christoph and
Segerer, Guillaume",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.925/",
pages = "10578--10583",
abstract = "The Linguistic Survey of India (LSI) and the Polyglotta Africana (PA) are two of the largest historical collections of multilingual wordlists. While the originally printed editions have long since been digitized and shared in various forms, no editions in which the original data is presented in standardized form, comparable with contemporary wordlist collections, have been produced so far. Here we present digital retro-standardized editions of both sources. For maximal interoperability with datasets such as Lexibank the two datasets have been converted to CLDF, the standard proposed by the Cross-Linguistic Data Formats initiative. In this way, an unambiguous identification of the three main constituents of wordlist data {--} language, concept and segments used for transcription {--} is ensured through links to the respective reference catalogs, Glottolog, Concepticon and CLTS. At this level of interoperability, legacy material such as LSI and PA may provide a reasonable complementary source for language documentation, filling in gaps where original documentation is not possible anymore."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="forkel-etal-2024-linguistic">
<titleInfo>
<title>Linguistic Survey of India and Polyglotta Africana: Two Retrostandardized Digital Editions of Large Historical Collections of Multilingual Wordlists</title>
</titleInfo>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Forkel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johann-Mattis</namePart>
<namePart type="family">List</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Rzymski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Segerer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Linguistic Survey of India (LSI) and the Polyglotta Africana (PA) are two of the largest historical collections of multilingual wordlists. While the originally printed editions have long since been digitized and shared in various forms, no editions in which the original data is presented in standardized form, comparable with contemporary wordlist collections, have been produced so far. Here we present digital retro-standardized editions of both sources. For maximal interoperability with datasets such as Lexibank the two datasets have been converted to CLDF, the standard proposed by the Cross-Linguistic Data Formats initiative. In this way, an unambiguous identification of the three main constituents of wordlist data – language, concept and segments used for transcription – is ensured through links to the respective reference catalogs, Glottolog, Concepticon and CLTS. At this level of interoperability, legacy material such as LSI and PA may provide a reasonable complementary source for language documentation, filling in gaps where original documentation is not possible anymore.</abstract>
<identifier type="citekey">forkel-etal-2024-linguistic</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.925/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>10578</start>
<end>10583</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linguistic Survey of India and Polyglotta Africana: Two Retrostandardized Digital Editions of Large Historical Collections of Multilingual Wordlists
%A Forkel, Robert
%A List, Johann-Mattis
%A Rzymski, Christoph
%A Segerer, Guillaume
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F forkel-etal-2024-linguistic
%X The Linguistic Survey of India (LSI) and the Polyglotta Africana (PA) are two of the largest historical collections of multilingual wordlists. While the originally printed editions have long since been digitized and shared in various forms, no editions in which the original data is presented in standardized form, comparable with contemporary wordlist collections, have been produced so far. Here we present digital retro-standardized editions of both sources. For maximal interoperability with datasets such as Lexibank the two datasets have been converted to CLDF, the standard proposed by the Cross-Linguistic Data Formats initiative. In this way, an unambiguous identification of the three main constituents of wordlist data – language, concept and segments used for transcription – is ensured through links to the respective reference catalogs, Glottolog, Concepticon and CLTS. At this level of interoperability, legacy material such as LSI and PA may provide a reasonable complementary source for language documentation, filling in gaps where original documentation is not possible anymore.
%U https://aclanthology.org/2024.lrec-main.925/
%P 10578-10583
Markdown (Informal)
[Linguistic Survey of India and Polyglotta Africana: Two Retrostandardized Digital Editions of Large Historical Collections of Multilingual Wordlists](https://aclanthology.org/2024.lrec-main.925/) (Forkel et al., LREC-COLING 2024)
ACL