@inproceedings{shen-etal-2026-new,
title = "New Compendium of a Myriad of Plants: A New Dataset Describing {A}ncient {C}hinese Plants",
author = "Shen, Xiaobin and
Wang, Zhongqing and
Li, Shichen and
Huang, Chu-Ren and
Zhou, Guodong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.73/",
pages = "1483--1498",
ISBN = "979-8-89176-395-1",
abstract = "In ancient China, a variety of datasets depicted humanistic scenes, geographical features, and plants. However, these datasets, compiled long ago, often contain errors, lack comprehensiveness, and are inconsistent with modern realities. To meet current demands, we aim to expand and improve ancient datasets using large language model. Focusing on the Great Compendium of Myriad Flowers, an invaluable ancient plants dataset, we gather information on numerous previously excluded plants, carefully select and organize classical Chinese poetry and prose, and construct a comprehensive botanical encyclopedia knowledge system. Additionally, we collect ancient paintings and modern photographs of plants to enrich the dataset. Furthermore, we propose a novel multi-modal plant classification model designed to integrate multi-modal information from both classical and contemporary sources, enabling the extraction of plant-related information from classical Chinese poetry and prose. Extensive experiments demonstrate the importance of the proposed new ancient plants dataset, and also indicate the effectiveness of our proposed multi-modal plant classification model."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shen-etal-2026-new">
<titleInfo>
<title>New Compendium of a Myriad of Plants: A New Dataset Describing Ancient Chinese Plants</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaobin</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongqing</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shichen</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guodong</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>In ancient China, a variety of datasets depicted humanistic scenes, geographical features, and plants. However, these datasets, compiled long ago, often contain errors, lack comprehensiveness, and are inconsistent with modern realities. To meet current demands, we aim to expand and improve ancient datasets using large language model. Focusing on the Great Compendium of Myriad Flowers, an invaluable ancient plants dataset, we gather information on numerous previously excluded plants, carefully select and organize classical Chinese poetry and prose, and construct a comprehensive botanical encyclopedia knowledge system. Additionally, we collect ancient paintings and modern photographs of plants to enrich the dataset. Furthermore, we propose a novel multi-modal plant classification model designed to integrate multi-modal information from both classical and contemporary sources, enabling the extraction of plant-related information from classical Chinese poetry and prose. Extensive experiments demonstrate the importance of the proposed new ancient plants dataset, and also indicate the effectiveness of our proposed multi-modal plant classification model.</abstract>
<identifier type="citekey">shen-etal-2026-new</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.73/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1483</start>
<end>1498</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T New Compendium of a Myriad of Plants: A New Dataset Describing Ancient Chinese Plants
%A Shen, Xiaobin
%A Wang, Zhongqing
%A Li, Shichen
%A Huang, Chu-Ren
%A Zhou, Guodong
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F shen-etal-2026-new
%X In ancient China, a variety of datasets depicted humanistic scenes, geographical features, and plants. However, these datasets, compiled long ago, often contain errors, lack comprehensiveness, and are inconsistent with modern realities. To meet current demands, we aim to expand and improve ancient datasets using large language model. Focusing on the Great Compendium of Myriad Flowers, an invaluable ancient plants dataset, we gather information on numerous previously excluded plants, carefully select and organize classical Chinese poetry and prose, and construct a comprehensive botanical encyclopedia knowledge system. Additionally, we collect ancient paintings and modern photographs of plants to enrich the dataset. Furthermore, we propose a novel multi-modal plant classification model designed to integrate multi-modal information from both classical and contemporary sources, enabling the extraction of plant-related information from classical Chinese poetry and prose. Extensive experiments demonstrate the importance of the proposed new ancient plants dataset, and also indicate the effectiveness of our proposed multi-modal plant classification model.
%U https://aclanthology.org/2026.findings-acl.73/
%P 1483-1498
Markdown (Informal)
[New Compendium of a Myriad of Plants: A New Dataset Describing Ancient Chinese Plants](https://aclanthology.org/2026.findings-acl.73/) (Shen et al., Findings 2026)
ACL