@inproceedings{hus-anastasopoulos-2026-rag,
title = "A {RAG} Approach for Typological Database Completion",
author = "Hus, Jonathan and
Anastasopoulos, Antonios",
editor = "Vylomova, Ekaterina and
Shcherbakov, Andrei and
Rani, Priya",
booktitle = "Proceedings of the 8th Workshop on Research in Computational Linguistic Typology and Multilingual {NLP}",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.sigtyp-main.7/",
pages = "39--49",
ISBN = "979-8-89176-374-6",
abstract = "Linguistic reference material is a trove of information that can be utilized for the analysis of languages. The material, in the form of grammar books and sketches, has been used for machine translation, but it can also be used for language analysis. Retrieval Augmented Generation (RAG) has been demonstrated to improve large language model (LLM) capabilities by incorporating external reference material into the generation process. In this paper, we investigate the use of grammar books and RAG techniques to identify language features. We use Grambank for feature definition and ground truth values, and we evaluate on five typologically diverse low-resource languages. We demonstrate that this approach can effectively make use of reference material."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hus-anastasopoulos-2026-rag">
<titleInfo>
<title>A RAG Approach for Typological Database Completion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Hus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Research in Computational Linguistic Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Shcherbakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priya</namePart>
<namePart type="family">Rani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-374-6</identifier>
</relatedItem>
<abstract>Linguistic reference material is a trove of information that can be utilized for the analysis of languages. The material, in the form of grammar books and sketches, has been used for machine translation, but it can also be used for language analysis. Retrieval Augmented Generation (RAG) has been demonstrated to improve large language model (LLM) capabilities by incorporating external reference material into the generation process. In this paper, we investigate the use of grammar books and RAG techniques to identify language features. We use Grambank for feature definition and ground truth values, and we evaluate on five typologically diverse low-resource languages. We demonstrate that this approach can effectively make use of reference material.</abstract>
<identifier type="citekey">hus-anastasopoulos-2026-rag</identifier>
<location>
<url>https://aclanthology.org/2026.sigtyp-main.7/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>39</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A RAG Approach for Typological Database Completion
%A Hus, Jonathan
%A Anastasopoulos, Antonios
%Y Vylomova, Ekaterina
%Y Shcherbakov, Andrei
%Y Rani, Priya
%S Proceedings of the 8th Workshop on Research in Computational Linguistic Typology and Multilingual NLP
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-374-6
%F hus-anastasopoulos-2026-rag
%X Linguistic reference material is a trove of information that can be utilized for the analysis of languages. The material, in the form of grammar books and sketches, has been used for machine translation, but it can also be used for language analysis. Retrieval Augmented Generation (RAG) has been demonstrated to improve large language model (LLM) capabilities by incorporating external reference material into the generation process. In this paper, we investigate the use of grammar books and RAG techniques to identify language features. We use Grambank for feature definition and ground truth values, and we evaluate on five typologically diverse low-resource languages. We demonstrate that this approach can effectively make use of reference material.
%U https://aclanthology.org/2026.sigtyp-main.7/
%P 39-49
Markdown (Informal)
[A RAG Approach for Typological Database Completion](https://aclanthology.org/2026.sigtyp-main.7/) (Hus & Anastasopoulos, SIGTYP 2026)
ACL
- Jonathan Hus and Antonios Anastasopoulos. 2026. A RAG Approach for Typological Database Completion. In Proceedings of the 8th Workshop on Research in Computational Linguistic Typology and Multilingual NLP, pages 39–49, Rabat, Morocco. Association for Computational Linguistics.