@inproceedings{liu-best-2025-survey,
title = "A Survey of {NLP} Progress in {S}ino-{T}ibetan Low-Resource Languages",
author = "Liu, Shuheng and
Best, Michael",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-long.396/",
doi = "10.18653/v1/2025.naacl-long.396",
pages = "7804--7825",
ISBN = "979-8-89176-189-6",
abstract = "Despite the increasing effort in including more low-resource languages in NLP/CL development, most of the world{'}s languages are still absent. In this paper, we take the example of the Sino-Tibetan language family which consists of hundreds of low-resource languages, and we look at the representation of these low-resource languages in papers archived on ACL Anthology. Our findings indicate that while more techniques and discussions on more languages are present in more publication venues over the years, the overall focus on this language family has been minimal. The lack of attention might be owing to the small number of native speakers and governmental support of these languages. The current development of large language models, albeit successful in a few quintessential rich-resource languages, are still trailing when tackling these low-resource languages. Our paper calls for the attention in NLP/CL research on the inclusion of low-resource languages, especially as increasing resources are poured into the development of data-driven language models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-best-2025-survey">
<titleInfo>
<title>A Survey of NLP Progress in Sino-Tibetan Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuheng</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Best</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-189-6</identifier>
</relatedItem>
<abstract>Despite the increasing effort in including more low-resource languages in NLP/CL development, most of the world’s languages are still absent. In this paper, we take the example of the Sino-Tibetan language family which consists of hundreds of low-resource languages, and we look at the representation of these low-resource languages in papers archived on ACL Anthology. Our findings indicate that while more techniques and discussions on more languages are present in more publication venues over the years, the overall focus on this language family has been minimal. The lack of attention might be owing to the small number of native speakers and governmental support of these languages. The current development of large language models, albeit successful in a few quintessential rich-resource languages, are still trailing when tackling these low-resource languages. Our paper calls for the attention in NLP/CL research on the inclusion of low-resource languages, especially as increasing resources are poured into the development of data-driven language models.</abstract>
<identifier type="citekey">liu-best-2025-survey</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-long.396</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-long.396/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>7804</start>
<end>7825</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Survey of NLP Progress in Sino-Tibetan Low-Resource Languages
%A Liu, Shuheng
%A Best, Michael
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-189-6
%F liu-best-2025-survey
%X Despite the increasing effort in including more low-resource languages in NLP/CL development, most of the world’s languages are still absent. In this paper, we take the example of the Sino-Tibetan language family which consists of hundreds of low-resource languages, and we look at the representation of these low-resource languages in papers archived on ACL Anthology. Our findings indicate that while more techniques and discussions on more languages are present in more publication venues over the years, the overall focus on this language family has been minimal. The lack of attention might be owing to the small number of native speakers and governmental support of these languages. The current development of large language models, albeit successful in a few quintessential rich-resource languages, are still trailing when tackling these low-resource languages. Our paper calls for the attention in NLP/CL research on the inclusion of low-resource languages, especially as increasing resources are poured into the development of data-driven language models.
%R 10.18653/v1/2025.naacl-long.396
%U https://aclanthology.org/2025.naacl-long.396/
%U https://doi.org/10.18653/v1/2025.naacl-long.396
%P 7804-7825
Markdown (Informal)
[A Survey of NLP Progress in Sino-Tibetan Low-Resource Languages](https://aclanthology.org/2025.naacl-long.396/) (Liu & Best, NAACL 2025)
ACL
- Shuheng Liu and Michael Best. 2025. A Survey of NLP Progress in Sino-Tibetan Low-Resource Languages. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 7804–7825, Albuquerque, New Mexico. Association for Computational Linguistics.