@inproceedings{r-etal-2025-field,
title = "Field to Model: Pairing Community Data Collection with Scalable {NLP} through the {L}i{FE} Suite",
author = "R, Karthick Narayanan and
Singh, Siddharth and
Singh, Saurabh and
Mathur, Aryan and
Kumar, Ritesh and
Ratan, Shyam and
Lahiri, Bornini and
Pareek, Benu and
Mathur, Neerav and
Gope, Amalesh and
Takhellambam, Meiraba and
Dawer, Yogesh",
editor = "Le Ferrand, {\'E}ric and
Klyachko, Elena and
Postnikova, Anna and
Shavrina, Tatiana and
Serikov, Oleg and
Voloshina, Ekaterina and
Vylomova, Ekaterina",
booktitle = "Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.fieldmatters-1.7/",
pages = "76--84",
ISBN = "979-8-89176-282-4",
abstract = "We present LiFE Suite as a ``Field-to-Model'' pipeline, designed to bridge community-centred data collection with scalable language model development. This paper describes the various tools integrated into the LiFE Suite that make this unified pipeline possible. Atekho, a mobile-first data collection platform, is designed to empower communities to assert their rights over their data. MATra-Lab, a web-based data processing and annotation tool, supports the management of field data and the creation of NLP-ready datasets with support from existing state-of-the-art NLP models. LiFE Model Studio, built on top of Hugging Face AutoTrain, offers a no-code solution for building scalable language models using the field data. This end-to-end integration ensures that every dataset collected in the field retains its linguistic, cultural, and metadata context, all the way through to deployable AI models and archive-ready datasets."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="r-etal-2025-field">
<titleInfo>
<title>Field to Model: Pairing Community Data Collection with Scalable NLP through the LiFE Suite</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karthick</namePart>
<namePart type="given">Narayanan</namePart>
<namePart type="family">R</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aryan</namePart>
<namePart type="family">Mathur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Ratan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bornini</namePart>
<namePart type="family">Lahiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benu</namePart>
<namePart type="family">Pareek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neerav</namePart>
<namePart type="family">Mathur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amalesh</namePart>
<namePart type="family">Gope</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meiraba</namePart>
<namePart type="family">Takhellambam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yogesh</namePart>
<namePart type="family">Dawer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Éric</namePart>
<namePart type="family">Le Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Klyachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Postnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Shavrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-282-4</identifier>
</relatedItem>
<abstract>We present LiFE Suite as a “Field-to-Model” pipeline, designed to bridge community-centred data collection with scalable language model development. This paper describes the various tools integrated into the LiFE Suite that make this unified pipeline possible. Atekho, a mobile-first data collection platform, is designed to empower communities to assert their rights over their data. MATra-Lab, a web-based data processing and annotation tool, supports the management of field data and the creation of NLP-ready datasets with support from existing state-of-the-art NLP models. LiFE Model Studio, built on top of Hugging Face AutoTrain, offers a no-code solution for building scalable language models using the field data. This end-to-end integration ensures that every dataset collected in the field retains its linguistic, cultural, and metadata context, all the way through to deployable AI models and archive-ready datasets.</abstract>
<identifier type="citekey">r-etal-2025-field</identifier>
<location>
<url>https://aclanthology.org/2025.fieldmatters-1.7/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>76</start>
<end>84</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Field to Model: Pairing Community Data Collection with Scalable NLP through the LiFE Suite
%A R, Karthick Narayanan
%A Singh, Siddharth
%A Singh, Saurabh
%A Mathur, Aryan
%A Kumar, Ritesh
%A Ratan, Shyam
%A Lahiri, Bornini
%A Pareek, Benu
%A Mathur, Neerav
%A Gope, Amalesh
%A Takhellambam, Meiraba
%A Dawer, Yogesh
%Y Le Ferrand, Éric
%Y Klyachko, Elena
%Y Postnikova, Anna
%Y Shavrina, Tatiana
%Y Serikov, Oleg
%Y Voloshina, Ekaterina
%Y Vylomova, Ekaterina
%S Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-282-4
%F r-etal-2025-field
%X We present LiFE Suite as a “Field-to-Model” pipeline, designed to bridge community-centred data collection with scalable language model development. This paper describes the various tools integrated into the LiFE Suite that make this unified pipeline possible. Atekho, a mobile-first data collection platform, is designed to empower communities to assert their rights over their data. MATra-Lab, a web-based data processing and annotation tool, supports the management of field data and the creation of NLP-ready datasets with support from existing state-of-the-art NLP models. LiFE Model Studio, built on top of Hugging Face AutoTrain, offers a no-code solution for building scalable language models using the field data. This end-to-end integration ensures that every dataset collected in the field retains its linguistic, cultural, and metadata context, all the way through to deployable AI models and archive-ready datasets.
%U https://aclanthology.org/2025.fieldmatters-1.7/
%P 76-84
Markdown (Informal)
[Field to Model: Pairing Community Data Collection with Scalable NLP through the LiFE Suite](https://aclanthology.org/2025.fieldmatters-1.7/) (R et al., FieldMatters 2025)
ACL
- Karthick Narayanan R, Siddharth Singh, Saurabh Singh, Aryan Mathur, Ritesh Kumar, Shyam Ratan, Bornini Lahiri, Benu Pareek, Neerav Mathur, Amalesh Gope, Meiraba Takhellambam, and Yogesh Dawer. 2025. Field to Model: Pairing Community Data Collection with Scalable NLP through the LiFE Suite. In Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics, pages 76–84, Vienna, Austria. Association for Computational Linguistics.