@inproceedings{wang-etal-2025-tag,
title = "Tag-Evol: Achieving Efficient Instruction Evolving via Tag Injection",
author = "Wang, Yixuan and
Zhou, Shiqi and
Guo, Chuanzhe and
Zhu, Qingfu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.409/",
doi = "10.18653/v1/2025.findings-acl.409",
pages = "7856--7869",
ISBN = "979-8-89176-256-5",
abstract = "Evol-Instruct has made significant improvements as a data synthesis method in several areas. Existing methods typically rely on a fixed set of strategies to evolve, which require manual design and are monolithic in form. In addition, iterative evolution also makes the acquisition of hard samples expensive. In view of this, we propose the Tag-Evol framework, a more diverse and efficient instruction evolving method. Specifically, Tag-Evol uses diverse and specific knowledge tags as strategies to achieve controlled evolution by injecting different combinations of tags into the original instructions. Experiments with multiple backbones in mathematical and code domain benchmarks show that the proposed method generates significantly better evolved data than other methods. Furthermore, we conduct a thorough analysis of the evolved data, demonstrating that Tag-Evol is not only efficient but also generates more diverse and challenging data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2025-tag">
<titleInfo>
<title>Tag-Evol: Achieving Efficient Instruction Evolving via Tag Injection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yixuan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiqi</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuanzhe</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingfu</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Evol-Instruct has made significant improvements as a data synthesis method in several areas. Existing methods typically rely on a fixed set of strategies to evolve, which require manual design and are monolithic in form. In addition, iterative evolution also makes the acquisition of hard samples expensive. In view of this, we propose the Tag-Evol framework, a more diverse and efficient instruction evolving method. Specifically, Tag-Evol uses diverse and specific knowledge tags as strategies to achieve controlled evolution by injecting different combinations of tags into the original instructions. Experiments with multiple backbones in mathematical and code domain benchmarks show that the proposed method generates significantly better evolved data than other methods. Furthermore, we conduct a thorough analysis of the evolved data, demonstrating that Tag-Evol is not only efficient but also generates more diverse and challenging data.</abstract>
<identifier type="citekey">wang-etal-2025-tag</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.409</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.409/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>7856</start>
<end>7869</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tag-Evol: Achieving Efficient Instruction Evolving via Tag Injection
%A Wang, Yixuan
%A Zhou, Shiqi
%A Guo, Chuanzhe
%A Zhu, Qingfu
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F wang-etal-2025-tag
%X Evol-Instruct has made significant improvements as a data synthesis method in several areas. Existing methods typically rely on a fixed set of strategies to evolve, which require manual design and are monolithic in form. In addition, iterative evolution also makes the acquisition of hard samples expensive. In view of this, we propose the Tag-Evol framework, a more diverse and efficient instruction evolving method. Specifically, Tag-Evol uses diverse and specific knowledge tags as strategies to achieve controlled evolution by injecting different combinations of tags into the original instructions. Experiments with multiple backbones in mathematical and code domain benchmarks show that the proposed method generates significantly better evolved data than other methods. Furthermore, we conduct a thorough analysis of the evolved data, demonstrating that Tag-Evol is not only efficient but also generates more diverse and challenging data.
%R 10.18653/v1/2025.findings-acl.409
%U https://aclanthology.org/2025.findings-acl.409/
%U https://doi.org/10.18653/v1/2025.findings-acl.409
%P 7856-7869
Markdown (Informal)
[Tag-Evol: Achieving Efficient Instruction Evolving via Tag Injection](https://aclanthology.org/2025.findings-acl.409/) (Wang et al., Findings 2025)
ACL