@inproceedings{jiangkuo-etal-2024-chinese,
title = "{C}hinese Vision-Language Understanding Evaluation",
author = "Wang, Jiangkuo and
Zheng, Linwei and
Chen, Kehai and
Bai, Xuefeng and
Zhang, Min",
editor = "Hongfei, Lin and
Hongye, Tan and
Bin, Li",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2024.ccl-3.41/",
pages = "363--373",
language = "eng",
abstract = "``This paper introduces our systems submitted for the Chinese Vision-Language Understanding Evaluation task at the 23rd Chinese Computational Linguistics Conference.In this competition, we utilized X2-VLM and CCLM models to participate in various subtasks such as image-text retrieval, visual grounding, visual dialogue, and visual question answering. Additionally, we employed other models to assess performance on certain subtasks. We optimized our models and successfully applied them to these different tasks''"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiangkuo-etal-2024-chinese">
<titleInfo>
<title>Chinese Vision-Language Understanding Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiangkuo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linwei</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kehai</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuefeng</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="family">Hongfei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tan</namePart>
<namePart type="family">Hongye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Bin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Taiyuan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“This paper introduces our systems submitted for the Chinese Vision-Language Understanding Evaluation task at the 23rd Chinese Computational Linguistics Conference.In this competition, we utilized X2-VLM and CCLM models to participate in various subtasks such as image-text retrieval, visual grounding, visual dialogue, and visual question answering. Additionally, we employed other models to assess performance on certain subtasks. We optimized our models and successfully applied them to these different tasks”</abstract>
<identifier type="citekey">jiangkuo-etal-2024-chinese</identifier>
<location>
<url>https://aclanthology.org/2024.ccl-3.41/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>363</start>
<end>373</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Chinese Vision-Language Understanding Evaluation
%A Wang, Jiangkuo
%A Zheng, Linwei
%A Chen, Kehai
%A Bai, Xuefeng
%A Zhang, Min
%Y Hongfei, Lin
%Y Hongye, Tan
%Y Bin, Li
%S Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)
%D 2024
%8 July
%I Chinese Information Processing Society of China
%C Taiyuan, China
%G eng
%F jiangkuo-etal-2024-chinese
%X “This paper introduces our systems submitted for the Chinese Vision-Language Understanding Evaluation task at the 23rd Chinese Computational Linguistics Conference.In this competition, we utilized X2-VLM and CCLM models to participate in various subtasks such as image-text retrieval, visual grounding, visual dialogue, and visual question answering. Additionally, we employed other models to assess performance on certain subtasks. We optimized our models and successfully applied them to these different tasks”
%U https://aclanthology.org/2024.ccl-3.41/
%P 363-373
Markdown (Informal)
[Chinese Vision-Language Understanding Evaluation](https://aclanthology.org/2024.ccl-3.41/) (Wang et al., CCL 2024)
ACL
- Jiangkuo Wang, Linwei Zheng, Kehai Chen, Xuefeng Bai, and Min Zhang. 2024. Chinese Vision-Language Understanding Evaluation. In Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations), pages 363–373, Taiyuan, China. Chinese Information Processing Society of China.