@inproceedings{cui-etal-2025-dasr,
title = "{DASR}: Distributed Adaptive Scene Recognition - A Multi-Agent Cloud-Edge Framework for Language-Guided Scene Detection",
author = "Cui, Can and
Liu, Yongkang and
Ucar, Seyhan and
Peng, Juntong and
Moradipari, Ahmadreza and
Khabazi, Maryam and
Wang, Ziran",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.57/",
pages = "850--858",
ISBN = "979-8-89176-333-3",
abstract = "The increasing complexity of modern driving systems demands efficient collection and analysis of specific driving scenarios that are crucial for system development and validation. Current approaches either rely on massive data collection followed by manual filtering, or rigid threshold-based recording systems that often miss important edge cases. In this paper, we present Distributed Adaptive Scene Recognition (DASR), a novel multi-agent cloud-edge framework for language-guided scene detection in connected vehicles. Our system leverages the complementary strengths of cloud-based large language models and edge-deployed vision language models to intelligently identify and preserve relevant driving scenarios while optimizing limited on-vehicle buffer storage. The cloud-based LLM serves as an intelligent coordinator that analyzes developer prompts to determine which specialized tools and sensor data streams should be incorporated, while the edge-deployed VLM efficiently processes video streams in real time to make relevant decisions. Extensive experiments across multiple driving datasets demonstrate that our framework achieves superior performance compared to larger baseline models, with exceptional performance on complex driving tasks requiring sophisticated reasoning. DASR also shows strong generalization capabilities on out-of-distribution datasets and significantly reduces storage requirements (28.73 {\%}) compared to baseline methods."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cui-etal-2025-dasr">
<titleInfo>
<title>DASR: Distributed Adaptive Scene Recognition - A Multi-Agent Cloud-Edge Framework for Language-Guided Scene Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Can</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongkang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seyhan</namePart>
<namePart type="family">Ucar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntong</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmadreza</namePart>
<namePart type="family">Moradipari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maryam</namePart>
<namePart type="family">Khabazi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziran</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>The increasing complexity of modern driving systems demands efficient collection and analysis of specific driving scenarios that are crucial for system development and validation. Current approaches either rely on massive data collection followed by manual filtering, or rigid threshold-based recording systems that often miss important edge cases. In this paper, we present Distributed Adaptive Scene Recognition (DASR), a novel multi-agent cloud-edge framework for language-guided scene detection in connected vehicles. Our system leverages the complementary strengths of cloud-based large language models and edge-deployed vision language models to intelligently identify and preserve relevant driving scenarios while optimizing limited on-vehicle buffer storage. The cloud-based LLM serves as an intelligent coordinator that analyzes developer prompts to determine which specialized tools and sensor data streams should be incorporated, while the edge-deployed VLM efficiently processes video streams in real time to make relevant decisions. Extensive experiments across multiple driving datasets demonstrate that our framework achieves superior performance compared to larger baseline models, with exceptional performance on complex driving tasks requiring sophisticated reasoning. DASR also shows strong generalization capabilities on out-of-distribution datasets and significantly reduces storage requirements (28.73 %) compared to baseline methods.</abstract>
<identifier type="citekey">cui-etal-2025-dasr</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.57/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>850</start>
<end>858</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DASR: Distributed Adaptive Scene Recognition - A Multi-Agent Cloud-Edge Framework for Language-Guided Scene Detection
%A Cui, Can
%A Liu, Yongkang
%A Ucar, Seyhan
%A Peng, Juntong
%A Moradipari, Ahmadreza
%A Khabazi, Maryam
%A Wang, Ziran
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F cui-etal-2025-dasr
%X The increasing complexity of modern driving systems demands efficient collection and analysis of specific driving scenarios that are crucial for system development and validation. Current approaches either rely on massive data collection followed by manual filtering, or rigid threshold-based recording systems that often miss important edge cases. In this paper, we present Distributed Adaptive Scene Recognition (DASR), a novel multi-agent cloud-edge framework for language-guided scene detection in connected vehicles. Our system leverages the complementary strengths of cloud-based large language models and edge-deployed vision language models to intelligently identify and preserve relevant driving scenarios while optimizing limited on-vehicle buffer storage. The cloud-based LLM serves as an intelligent coordinator that analyzes developer prompts to determine which specialized tools and sensor data streams should be incorporated, while the edge-deployed VLM efficiently processes video streams in real time to make relevant decisions. Extensive experiments across multiple driving datasets demonstrate that our framework achieves superior performance compared to larger baseline models, with exceptional performance on complex driving tasks requiring sophisticated reasoning. DASR also shows strong generalization capabilities on out-of-distribution datasets and significantly reduces storage requirements (28.73 %) compared to baseline methods.
%U https://aclanthology.org/2025.emnlp-industry.57/
%P 850-858
Markdown (Informal)
[DASR: Distributed Adaptive Scene Recognition - A Multi-Agent Cloud-Edge Framework for Language-Guided Scene Detection](https://aclanthology.org/2025.emnlp-industry.57/) (Cui et al., EMNLP 2025)
ACL