@inproceedings{han-etal-2026-camoquery,
title = "{C}amo{Q}uery: Language-Guided Reasoning Camouflaged Object Segmentation",
author = "Han, Tianxin and
Dong, Qing and
Wang, Xingwei and
Jia, Jie and
Wu, Gang and
Yang, Bowen and
Zhang, Fu",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1050/",
pages = "22924--22941",
ISBN = "979-8-89176-390-6",
abstract = "Although camouflaged object segmentation has advanced rapidly in recent years, existing methods are still confined to visual mask prediction under fixed task assumptions. They cannot interactively respond to user requests, nor can they proactively understand and reason about the user{'}s intent. Our work tackles this issue by proposing a novel task, Language-Guided Reasoning Camouflaged Object Segmentation (LRCOS). Given a camouflaged image and an implicit query text instruction that requires reasoning, LRCOS aims to output intent-consistent segmentation mask. To establish a benchmark for this task, we build CamoQuery, comprising 12,437 image{--}mask samples and 25971 implicit query text instructions. To better reflect real-world camouflaged scenarios, we additionally collect MCD, a multi-instance camouflage dataset where multiple camouflaged targets co-exist within the same scene, increasing the need for reasoning. Building on CamoQuery, we further propose COSA, a vision{--}language segmentation assistant that segments the intended camouflaged object from implicit queries and produces a reasoning explanation. Experiments on CamoQuery demonstrate that COSA has strong reasoning segmentation capability in camouflaged scenes and exhibits zero-shot capability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="han-etal-2026-camoquery">
<titleInfo>
<title>CamoQuery: Language-Guided Reasoning Camouflaged Object Segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tianxin</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qing</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xingwei</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jie</namePart>
<namePart type="family">Jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gang</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bowen</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Although camouflaged object segmentation has advanced rapidly in recent years, existing methods are still confined to visual mask prediction under fixed task assumptions. They cannot interactively respond to user requests, nor can they proactively understand and reason about the user’s intent. Our work tackles this issue by proposing a novel task, Language-Guided Reasoning Camouflaged Object Segmentation (LRCOS). Given a camouflaged image and an implicit query text instruction that requires reasoning, LRCOS aims to output intent-consistent segmentation mask. To establish a benchmark for this task, we build CamoQuery, comprising 12,437 image–mask samples and 25971 implicit query text instructions. To better reflect real-world camouflaged scenarios, we additionally collect MCD, a multi-instance camouflage dataset where multiple camouflaged targets co-exist within the same scene, increasing the need for reasoning. Building on CamoQuery, we further propose COSA, a vision–language segmentation assistant that segments the intended camouflaged object from implicit queries and produces a reasoning explanation. Experiments on CamoQuery demonstrate that COSA has strong reasoning segmentation capability in camouflaged scenes and exhibits zero-shot capability.</abstract>
<identifier type="citekey">han-etal-2026-camoquery</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1050/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>22924</start>
<end>22941</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CamoQuery: Language-Guided Reasoning Camouflaged Object Segmentation
%A Han, Tianxin
%A Dong, Qing
%A Wang, Xingwei
%A Jia, Jie
%A Wu, Gang
%A Yang, Bowen
%A Zhang, Fu
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F han-etal-2026-camoquery
%X Although camouflaged object segmentation has advanced rapidly in recent years, existing methods are still confined to visual mask prediction under fixed task assumptions. They cannot interactively respond to user requests, nor can they proactively understand and reason about the user’s intent. Our work tackles this issue by proposing a novel task, Language-Guided Reasoning Camouflaged Object Segmentation (LRCOS). Given a camouflaged image and an implicit query text instruction that requires reasoning, LRCOS aims to output intent-consistent segmentation mask. To establish a benchmark for this task, we build CamoQuery, comprising 12,437 image–mask samples and 25971 implicit query text instructions. To better reflect real-world camouflaged scenarios, we additionally collect MCD, a multi-instance camouflage dataset where multiple camouflaged targets co-exist within the same scene, increasing the need for reasoning. Building on CamoQuery, we further propose COSA, a vision–language segmentation assistant that segments the intended camouflaged object from implicit queries and produces a reasoning explanation. Experiments on CamoQuery demonstrate that COSA has strong reasoning segmentation capability in camouflaged scenes and exhibits zero-shot capability.
%U https://aclanthology.org/2026.acl-long.1050/
%P 22924-22941
Markdown (Informal)
[CamoQuery: Language-Guided Reasoning Camouflaged Object Segmentation](https://aclanthology.org/2026.acl-long.1050/) (Han et al., ACL 2026)
ACL
- Tianxin Han, Qing Dong, Xingwei Wang, Jie Jia, Gang Wu, Bowen Yang, and Fu Zhang. 2026. CamoQuery: Language-Guided Reasoning Camouflaged Object Segmentation. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 22924–22941, San Diego, California, United States. Association for Computational Linguistics.