@inproceedings{daohuan-etal-2025-system,
title = "System Report for {CCL}25-Eval Task 4: Prompting, Scheduling, and Arbitration Strategies for {C}hinese Factivity Inference",
author = "Daohuan, Liu and
Lun, Xia and
Zhang, Yuxuan and
Yang, Xinyu and
Kong, Fanzhen",
editor = "Lin, Hongfei and
Li, Bin and
Tan, Hongye",
booktitle = "Proceedings of the 24th {C}hina National Conference on Computational Linguistics ({CCL} 2025)",
month = aug,
year = "2025",
address = "Jinan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2025.ccl-2.17/",
pages = "146--151",
abstract = "This report presents the methodology and findings of prompting large language models (LLMs) for Chinese Factivity Inference (FI). We evaluated five LLMs, among which DeepSeek-R1 demonstrated the best overall performance. A combination of Chain-of-Thought (CoT), few-shot, and system-level instructions were combined for final prompting. Additionally, we introduced a pairwise task scheduling strategy and a multi-agent disagreement arbitration mechanism to further enhance inference quality. Experimental results show that the integration of prompting, scheduling, and arbitration strategies significantly improves performance, with DeepSeek-R1 achieving 91.7{\%} overall accuracy on the evaluation set. The report also highlights findings regarding LLM behavior on FI tasks and outlines potential directions for future improvement."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="daohuan-etal-2025-system">
<titleInfo>
<title>System Report for CCL25-Eval Task 4: Prompting, Scheduling, and Arbitration Strategies for Chinese Factivity Inference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liu</namePart>
<namePart type="family">Daohuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xia</namePart>
<namePart type="family">Lun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxuan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinyu</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fanzhen</namePart>
<namePart type="family">Kong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th China National Conference on Computational Linguistics (CCL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongfei</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongye</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Jinan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This report presents the methodology and findings of prompting large language models (LLMs) for Chinese Factivity Inference (FI). We evaluated five LLMs, among which DeepSeek-R1 demonstrated the best overall performance. A combination of Chain-of-Thought (CoT), few-shot, and system-level instructions were combined for final prompting. Additionally, we introduced a pairwise task scheduling strategy and a multi-agent disagreement arbitration mechanism to further enhance inference quality. Experimental results show that the integration of prompting, scheduling, and arbitration strategies significantly improves performance, with DeepSeek-R1 achieving 91.7% overall accuracy on the evaluation set. The report also highlights findings regarding LLM behavior on FI tasks and outlines potential directions for future improvement.</abstract>
<identifier type="citekey">daohuan-etal-2025-system</identifier>
<location>
<url>https://aclanthology.org/2025.ccl-2.17/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>146</start>
<end>151</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T System Report for CCL25-Eval Task 4: Prompting, Scheduling, and Arbitration Strategies for Chinese Factivity Inference
%A Daohuan, Liu
%A Lun, Xia
%A Zhang, Yuxuan
%A Yang, Xinyu
%A Kong, Fanzhen
%Y Lin, Hongfei
%Y Li, Bin
%Y Tan, Hongye
%S Proceedings of the 24th China National Conference on Computational Linguistics (CCL 2025)
%D 2025
%8 August
%I Chinese Information Processing Society of China
%C Jinan, China
%F daohuan-etal-2025-system
%X This report presents the methodology and findings of prompting large language models (LLMs) for Chinese Factivity Inference (FI). We evaluated five LLMs, among which DeepSeek-R1 demonstrated the best overall performance. A combination of Chain-of-Thought (CoT), few-shot, and system-level instructions were combined for final prompting. Additionally, we introduced a pairwise task scheduling strategy and a multi-agent disagreement arbitration mechanism to further enhance inference quality. Experimental results show that the integration of prompting, scheduling, and arbitration strategies significantly improves performance, with DeepSeek-R1 achieving 91.7% overall accuracy on the evaluation set. The report also highlights findings regarding LLM behavior on FI tasks and outlines potential directions for future improvement.
%U https://aclanthology.org/2025.ccl-2.17/
%P 146-151
Markdown (Informal)
[System Report for CCL25-Eval Task 4: Prompting, Scheduling, and Arbitration Strategies for Chinese Factivity Inference](https://aclanthology.org/2025.ccl-2.17/) (Daohuan et al., CCL 2025)
ACL