@inproceedings{haynie-etal-2023-grambanks,
title = "Grambank{'}s Typological Advances Support Computational Research on Diverse Languages",
author = "Haynie, Hannah J. and
Blasi, Dami{\'a}n and
Skirg{\aa}rd, Hedvig and
Greenhill, Simon J. and
Atkinson, Quentin D. and
Gray, Russell D.",
editor = "Beinborn, Lisa and
Goswami, Koustava and
Murado{\u{g}}lu, Saliha and
Sorokin, Alexey and
Kumar, Ritesh and
Shcherbakov, Andreas and
Ponti, Edoardo M. and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigtyp-1.17",
doi = "10.18653/v1/2023.sigtyp-1.17",
pages = "147--149",
abstract = "Of approximately 7,000 languages around the world, only a handful have abundant computational resources. Extending the reach of language technologies to diverse, less-resourced languages is important for tackling the challenges of digital equity and inclusion. Here we introduce the Grambank typological database as a resource to support such efforts. To date, work that uses typological data to extend computational research to less-resourced languages has relied on cross-linguistic morphosyntax datasets that are sparsely populated, use categorical coding that can be difficult to interpret, and introduce redundant information across features. Grambank presents similar information (e.g. word order, grammatical relation marking, constructions like interrogatives and negation), but is designed to avoid several disadvantages of legacy typological resources. Grambank{'}s 195 features encode basic information about morphology and syntax for 2,467 languages. 83{\%} of these languages are annotated for at least 100 features. By implementing binary coding for most features and curating the dataset to avoid logical dependencies, Grambank presents information in a user-friendly format for computational applications. The scale, completeness, reliability, format, and documentation of Grambank make it a useful resource for linguistically-informed models, cross-lingual NLP, and research targeting less-resourced languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="haynie-etal-2023-grambanks">
<titleInfo>
<title>Grambank’s Typological Advances Support Computational Research on Diverse Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannah</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Haynie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damián</namePart>
<namePart type="family">Blasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hedvig</namePart>
<namePart type="family">Skirgård</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Greenhill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quentin</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Atkinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Russell</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Gray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Beinborn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustava</namePart>
<namePart type="family">Goswami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saliha</namePart>
<namePart type="family">Muradoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexey</namePart>
<namePart type="family">Sorokin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Shcherbakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Of approximately 7,000 languages around the world, only a handful have abundant computational resources. Extending the reach of language technologies to diverse, less-resourced languages is important for tackling the challenges of digital equity and inclusion. Here we introduce the Grambank typological database as a resource to support such efforts. To date, work that uses typological data to extend computational research to less-resourced languages has relied on cross-linguistic morphosyntax datasets that are sparsely populated, use categorical coding that can be difficult to interpret, and introduce redundant information across features. Grambank presents similar information (e.g. word order, grammatical relation marking, constructions like interrogatives and negation), but is designed to avoid several disadvantages of legacy typological resources. Grambank’s 195 features encode basic information about morphology and syntax for 2,467 languages. 83% of these languages are annotated for at least 100 features. By implementing binary coding for most features and curating the dataset to avoid logical dependencies, Grambank presents information in a user-friendly format for computational applications. The scale, completeness, reliability, format, and documentation of Grambank make it a useful resource for linguistically-informed models, cross-lingual NLP, and research targeting less-resourced languages.</abstract>
<identifier type="citekey">haynie-etal-2023-grambanks</identifier>
<identifier type="doi">10.18653/v1/2023.sigtyp-1.17</identifier>
<location>
<url>https://aclanthology.org/2023.sigtyp-1.17</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>147</start>
<end>149</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Grambank’s Typological Advances Support Computational Research on Diverse Languages
%A Haynie, Hannah J.
%A Blasi, Damián
%A Skirgård, Hedvig
%A Greenhill, Simon J.
%A Atkinson, Quentin D.
%A Gray, Russell D.
%Y Beinborn, Lisa
%Y Goswami, Koustava
%Y Muradoğlu, Saliha
%Y Sorokin, Alexey
%Y Kumar, Ritesh
%Y Shcherbakov, Andreas
%Y Ponti, Edoardo M.
%Y Cotterell, Ryan
%Y Vylomova, Ekaterina
%S Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F haynie-etal-2023-grambanks
%X Of approximately 7,000 languages around the world, only a handful have abundant computational resources. Extending the reach of language technologies to diverse, less-resourced languages is important for tackling the challenges of digital equity and inclusion. Here we introduce the Grambank typological database as a resource to support such efforts. To date, work that uses typological data to extend computational research to less-resourced languages has relied on cross-linguistic morphosyntax datasets that are sparsely populated, use categorical coding that can be difficult to interpret, and introduce redundant information across features. Grambank presents similar information (e.g. word order, grammatical relation marking, constructions like interrogatives and negation), but is designed to avoid several disadvantages of legacy typological resources. Grambank’s 195 features encode basic information about morphology and syntax for 2,467 languages. 83% of these languages are annotated for at least 100 features. By implementing binary coding for most features and curating the dataset to avoid logical dependencies, Grambank presents information in a user-friendly format for computational applications. The scale, completeness, reliability, format, and documentation of Grambank make it a useful resource for linguistically-informed models, cross-lingual NLP, and research targeting less-resourced languages.
%R 10.18653/v1/2023.sigtyp-1.17
%U https://aclanthology.org/2023.sigtyp-1.17
%U https://doi.org/10.18653/v1/2023.sigtyp-1.17
%P 147-149
Markdown (Informal)
[Grambank’s Typological Advances Support Computational Research on Diverse Languages](https://aclanthology.org/2023.sigtyp-1.17) (Haynie et al., SIGTYP 2023)
ACL