@inproceedings{lu-etal-2025-fipo,
title = "{FIPO}: Free-form Instruction-oriented Prompt Optimization with Preference Dataset and Modular Fine-tuning Schema",
author = "Lu, Junru and
An, Siyu and
Zhang, Min and
He, Yulan and
Yin, Di and
Sun, Xing",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.731/",
pages = "11029--11047",
abstract = "When carefully optimized by human experts, naive prompts can significantly enhance the task performance of large language models (LLMs). However, such expert-driven prompt optimizations are resource-intensive. To address this, some studies have proposed Automatic Prompt Optimization (APO), which refines naive prompts according to task outputs from in-box testing models, utilizing advanced LLMs (e.g., GPT-4) in an ad-hoc way. Although effective, current approaches face challenges in generalization and privacy risks. To overcome these limitations, we have developed the first large-scale Prompt Optimization Preference (POP) dataset, fine-tuned offline local LLM-based optimizers, and conducted fairly evaluations across various downstream models. Our method, named Free-from Instruction-oriented Prompt Optimization (FIPO), allows precise optimization of the core task instructions in naive prompts in a model-agnostic manner. FIPO uses a modular APO template that dynamically incorporates the naive task instructions, optional instruction responses, and optional ground truth to produce refined prompts. The POP dataset is meticulously constructed using advanced LLMs, undergoing rigorous cross-validation by human experts and analytical models. By leveraging insights from this dataset, along with Tulu2 models and diverse fine-tuning strategies, we validate the efficacy of the FIPO framework across five public benchmarks and six testing models. Our dataset and codes are available at: https://github.com/LuJunru/FIPO{\_}Project."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lu-etal-2025-fipo">
<titleInfo>
<title>FIPO: Free-form Instruction-oriented Prompt Optimization with Preference Dataset and Modular Fine-tuning Schema</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junru</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siyu</namePart>
<namePart type="family">An</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Di</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>When carefully optimized by human experts, naive prompts can significantly enhance the task performance of large language models (LLMs). However, such expert-driven prompt optimizations are resource-intensive. To address this, some studies have proposed Automatic Prompt Optimization (APO), which refines naive prompts according to task outputs from in-box testing models, utilizing advanced LLMs (e.g., GPT-4) in an ad-hoc way. Although effective, current approaches face challenges in generalization and privacy risks. To overcome these limitations, we have developed the first large-scale Prompt Optimization Preference (POP) dataset, fine-tuned offline local LLM-based optimizers, and conducted fairly evaluations across various downstream models. Our method, named Free-from Instruction-oriented Prompt Optimization (FIPO), allows precise optimization of the core task instructions in naive prompts in a model-agnostic manner. FIPO uses a modular APO template that dynamically incorporates the naive task instructions, optional instruction responses, and optional ground truth to produce refined prompts. The POP dataset is meticulously constructed using advanced LLMs, undergoing rigorous cross-validation by human experts and analytical models. By leveraging insights from this dataset, along with Tulu2 models and diverse fine-tuning strategies, we validate the efficacy of the FIPO framework across five public benchmarks and six testing models. Our dataset and codes are available at: https://github.com/LuJunru/FIPO_Project.</abstract>
<identifier type="citekey">lu-etal-2025-fipo</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.731/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>11029</start>
<end>11047</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FIPO: Free-form Instruction-oriented Prompt Optimization with Preference Dataset and Modular Fine-tuning Schema
%A Lu, Junru
%A An, Siyu
%A Zhang, Min
%A He, Yulan
%A Yin, Di
%A Sun, Xing
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F lu-etal-2025-fipo
%X When carefully optimized by human experts, naive prompts can significantly enhance the task performance of large language models (LLMs). However, such expert-driven prompt optimizations are resource-intensive. To address this, some studies have proposed Automatic Prompt Optimization (APO), which refines naive prompts according to task outputs from in-box testing models, utilizing advanced LLMs (e.g., GPT-4) in an ad-hoc way. Although effective, current approaches face challenges in generalization and privacy risks. To overcome these limitations, we have developed the first large-scale Prompt Optimization Preference (POP) dataset, fine-tuned offline local LLM-based optimizers, and conducted fairly evaluations across various downstream models. Our method, named Free-from Instruction-oriented Prompt Optimization (FIPO), allows precise optimization of the core task instructions in naive prompts in a model-agnostic manner. FIPO uses a modular APO template that dynamically incorporates the naive task instructions, optional instruction responses, and optional ground truth to produce refined prompts. The POP dataset is meticulously constructed using advanced LLMs, undergoing rigorous cross-validation by human experts and analytical models. By leveraging insights from this dataset, along with Tulu2 models and diverse fine-tuning strategies, we validate the efficacy of the FIPO framework across five public benchmarks and six testing models. Our dataset and codes are available at: https://github.com/LuJunru/FIPO_Project.
%U https://aclanthology.org/2025.coling-main.731/
%P 11029-11047
Markdown (Informal)
[FIPO: Free-form Instruction-oriented Prompt Optimization with Preference Dataset and Modular Fine-tuning Schema](https://aclanthology.org/2025.coling-main.731/) (Lu et al., COLING 2025)
ACL