@inproceedings{jiangkuo-etal-2024-chinese,
title = "{C}hinese Vision-Language Understanding Evaluation",
author = "Jiangkuo, Wang and
Linwei, Zheng and
Kehai, Chen and
Xuefeng, Bai and
Min, Zhang",
editor = "Lin, Hongfei and
Tan, Hongye and
Li, Bin",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2024.ccl-3.41/",
pages = "363--373",
language = "eng",
abstract = "{\textquotedblleft}This paper introduces our systems submitted for the Chinese Vision-Language Understanding Evaluation task at the 23rd Chinese Computational Linguistics Conference.In this competition, we utilized X2-VLM and CCLM models to participate in various subtasks such as image-text retrieval, visual grounding, visual dialogue, and visual question answering. Additionally, we employed other models to assess performance on certain subtasks. We optimized our models and successfully applied them to these different tasks{\textquotedblright}"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiangkuo-etal-2024-chinese">
<titleInfo>
<title>Chinese Vision-Language Understanding Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wang</namePart>
<namePart type="family">Jiangkuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Linwei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Kehai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bai</namePart>
<namePart type="family">Xuefeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhang</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongfei</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongye</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Taiyuan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“This paper introduces our systems submitted for the Chinese Vision-Language Understanding Evaluation task at the 23rd Chinese Computational Linguistics Conference.In this competition, we utilized X2-VLM and CCLM models to participate in various subtasks such as image-text retrieval, visual grounding, visual dialogue, and visual question answering. Additionally, we employed other models to assess performance on certain subtasks. We optimized our models and successfully applied them to these different tasks”</abstract>
<identifier type="citekey">jiangkuo-etal-2024-chinese</identifier>
<location>
<url>https://aclanthology.org/2024.ccl-3.41/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>363</start>
<end>373</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Chinese Vision-Language Understanding Evaluation
%A Jiangkuo, Wang
%A Linwei, Zheng
%A Kehai, Chen
%A Xuefeng, Bai
%A Min, Zhang
%Y Lin, Hongfei
%Y Tan, Hongye
%Y Li, Bin
%S Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)
%D 2024
%8 July
%I Chinese Information Processing Society of China
%C Taiyuan, China
%G eng
%F jiangkuo-etal-2024-chinese
%X “This paper introduces our systems submitted for the Chinese Vision-Language Understanding Evaluation task at the 23rd Chinese Computational Linguistics Conference.In this competition, we utilized X2-VLM and CCLM models to participate in various subtasks such as image-text retrieval, visual grounding, visual dialogue, and visual question answering. Additionally, we employed other models to assess performance on certain subtasks. We optimized our models and successfully applied them to these different tasks”
%U https://aclanthology.org/2024.ccl-3.41/
%P 363-373
Markdown (Informal)
[Chinese Vision-Language Understanding Evaluation](https://aclanthology.org/2024.ccl-3.41/) (Jiangkuo et al., CCL 2024)
ACL
- Wang Jiangkuo, Zheng Linwei, Chen Kehai, Bai Xuefeng, and Zhang Min. 2024. Chinese Vision-Language Understanding Evaluation. In Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations), pages 363–373, Taiyuan, China. Chinese Information Processing Society of China.