@inproceedings{ghetoiu-nisioi-2025-graph,
title = "Graph-based {RAG} for Low-Resource {A}romanian{--}{R}omanian Translation",
author = "Ghetoiu, Laurentiu G. and
Nisioi, Sergiu",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.47/",
pages = "388--394",
abstract = "Aromanian, a linguistically and culturally significant yet low-resource Romance language, poses substantial challenges in computational linguistic research due to its limited NLP resources and non-standardized orthography. In this paper, we present an experimental study aimed at translating Aromanian texts into Romanian using a variety of modern NLP methodologies. We leverage two key resources: a parallel corpus consisting of approximately 3,000 sentence-aligned short stories and a dictionary of over 28,000 Aromanian-Romanian word pairs. Our approaches include Retrieval-Augmented Generation (RAG) supported by a graph-based alignment database, fine-tuning multilingual transformer models (specifically Meta{'}s NLLB), and parameter-efficient fine-tuning techniques such as LoRA applied to LLaMA-derived models. Evaluations using standard metrics (BLEU, chrF) demonstrate varied effectiveness across these methodologies, highlighting the strong performance of NLLB for general translation tasks, while RAG excels in translating familiar content. Our findings underline the complexities inherent in low-resource language translation and provide valuable insights into effective digital preservation and NLP adaptation strategies for underrepresented languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ghetoiu-nisioi-2025-graph">
<titleInfo>
<title>Graph-based RAG for Low-Resource Aromanian–Romanian Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Laurentiu</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Ghetoiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergiu</namePart>
<namePart type="family">Nisioi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Aromanian, a linguistically and culturally significant yet low-resource Romance language, poses substantial challenges in computational linguistic research due to its limited NLP resources and non-standardized orthography. In this paper, we present an experimental study aimed at translating Aromanian texts into Romanian using a variety of modern NLP methodologies. We leverage two key resources: a parallel corpus consisting of approximately 3,000 sentence-aligned short stories and a dictionary of over 28,000 Aromanian-Romanian word pairs. Our approaches include Retrieval-Augmented Generation (RAG) supported by a graph-based alignment database, fine-tuning multilingual transformer models (specifically Meta’s NLLB), and parameter-efficient fine-tuning techniques such as LoRA applied to LLaMA-derived models. Evaluations using standard metrics (BLEU, chrF) demonstrate varied effectiveness across these methodologies, highlighting the strong performance of NLLB for general translation tasks, while RAG excels in translating familiar content. Our findings underline the complexities inherent in low-resource language translation and provide valuable insights into effective digital preservation and NLP adaptation strategies for underrepresented languages.</abstract>
<identifier type="citekey">ghetoiu-nisioi-2025-graph</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.47/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>388</start>
<end>394</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Graph-based RAG for Low-Resource Aromanian–Romanian Translation
%A Ghetoiu, Laurentiu G.
%A Nisioi, Sergiu
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F ghetoiu-nisioi-2025-graph
%X Aromanian, a linguistically and culturally significant yet low-resource Romance language, poses substantial challenges in computational linguistic research due to its limited NLP resources and non-standardized orthography. In this paper, we present an experimental study aimed at translating Aromanian texts into Romanian using a variety of modern NLP methodologies. We leverage two key resources: a parallel corpus consisting of approximately 3,000 sentence-aligned short stories and a dictionary of over 28,000 Aromanian-Romanian word pairs. Our approaches include Retrieval-Augmented Generation (RAG) supported by a graph-based alignment database, fine-tuning multilingual transformer models (specifically Meta’s NLLB), and parameter-efficient fine-tuning techniques such as LoRA applied to LLaMA-derived models. Evaluations using standard metrics (BLEU, chrF) demonstrate varied effectiveness across these methodologies, highlighting the strong performance of NLLB for general translation tasks, while RAG excels in translating familiar content. Our findings underline the complexities inherent in low-resource language translation and provide valuable insights into effective digital preservation and NLP adaptation strategies for underrepresented languages.
%U https://aclanthology.org/2025.ranlp-1.47/
%P 388-394
Markdown (Informal)
[Graph-based RAG for Low-Resource Aromanian–Romanian Translation](https://aclanthology.org/2025.ranlp-1.47/) (Ghetoiu & Nisioi, RANLP 2025)
ACL
- Laurentiu G. Ghetoiu and Sergiu Nisioi. 2025. Graph-based RAG for Low-Resource Aromanian–Romanian Translation. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 388–394, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.