@inproceedings{boateng-etal-2025-concept,
title = "Concept Distillation from Strong to Weak Models via Hypotheses-to-Theories Prompting",
author = "Boateng, Emmanuel Aboah and
Becker, Cassiano O and
Asghar, Nabiha and
Walia, Kabir and
Srinivasan, Ashwin and
Nosakhare, Ehi and
Srinivasan, Soundararajan and
Dibia, Victor",
editor = "Chen, Weizhu and
Yang, Yi and
Kachuee, Mohammad and
Fu, Xue-Yong",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-industry.52/",
doi = "10.18653/v1/2025.naacl-industry.52",
pages = "638--654",
ISBN = "979-8-89176-194-0",
abstract = "Hand-crafting high quality prompts to optimize the performance of language models is a complicated and labor-intensive process. Furthermore, when migrating to newer, smaller, or weaker models (possibly due to latency or cost gains), prompts need to be updated to re-optimize the task performance. We propose Concept Distillation (CD), an automatic prompt optimization technique for enhancing weaker models on complex tasks. CD involves: (1) collecting mistakes made by weak models with a base prompt (initialization), (2) using a strong model to generate reasons for these mistakes and create rules/concepts for weak models (induction), and (3) filtering these rules based on validation set performance and integrating them into the base prompt (deduction/verification). We evaluated CD on NL2Code and mathematical reasoning tasks, observing significant performance boosts for small and weaker language models. Notably, Mistral-7B{'}s accuracy on Multi-Arith increased by 20{\%}, and Phi-3-mini-3.8B{'}s accuracy on HumanEval rose by 34{\%}. Compared to other automated methods, CD offers an effective, cost-efficient strategy for improving weak models' performance on complex tasks and enables seamless workload migration across different language models without compromising performance."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="boateng-etal-2025-concept">
<titleInfo>
<title>Concept Distillation from Strong to Weak Models via Hypotheses-to-Theories Prompting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="given">Aboah</namePart>
<namePart type="family">Boateng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cassiano</namePart>
<namePart type="given">O</namePart>
<namePart type="family">Becker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabiha</namePart>
<namePart type="family">Asghar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kabir</namePart>
<namePart type="family">Walia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwin</namePart>
<namePart type="family">Srinivasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehi</namePart>
<namePart type="family">Nosakhare</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soundararajan</namePart>
<namePart type="family">Srinivasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Dibia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weizhu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Kachuee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xue-Yong</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-194-0</identifier>
</relatedItem>
<abstract>Hand-crafting high quality prompts to optimize the performance of language models is a complicated and labor-intensive process. Furthermore, when migrating to newer, smaller, or weaker models (possibly due to latency or cost gains), prompts need to be updated to re-optimize the task performance. We propose Concept Distillation (CD), an automatic prompt optimization technique for enhancing weaker models on complex tasks. CD involves: (1) collecting mistakes made by weak models with a base prompt (initialization), (2) using a strong model to generate reasons for these mistakes and create rules/concepts for weak models (induction), and (3) filtering these rules based on validation set performance and integrating them into the base prompt (deduction/verification). We evaluated CD on NL2Code and mathematical reasoning tasks, observing significant performance boosts for small and weaker language models. Notably, Mistral-7B’s accuracy on Multi-Arith increased by 20%, and Phi-3-mini-3.8B’s accuracy on HumanEval rose by 34%. Compared to other automated methods, CD offers an effective, cost-efficient strategy for improving weak models’ performance on complex tasks and enables seamless workload migration across different language models without compromising performance.</abstract>
<identifier type="citekey">boateng-etal-2025-concept</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-industry.52</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-industry.52/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>638</start>
<end>654</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Concept Distillation from Strong to Weak Models via Hypotheses-to-Theories Prompting
%A Boateng, Emmanuel Aboah
%A Becker, Cassiano O.
%A Asghar, Nabiha
%A Walia, Kabir
%A Srinivasan, Ashwin
%A Nosakhare, Ehi
%A Srinivasan, Soundararajan
%A Dibia, Victor
%Y Chen, Weizhu
%Y Yang, Yi
%Y Kachuee, Mohammad
%Y Fu, Xue-Yong
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-194-0
%F boateng-etal-2025-concept
%X Hand-crafting high quality prompts to optimize the performance of language models is a complicated and labor-intensive process. Furthermore, when migrating to newer, smaller, or weaker models (possibly due to latency or cost gains), prompts need to be updated to re-optimize the task performance. We propose Concept Distillation (CD), an automatic prompt optimization technique for enhancing weaker models on complex tasks. CD involves: (1) collecting mistakes made by weak models with a base prompt (initialization), (2) using a strong model to generate reasons for these mistakes and create rules/concepts for weak models (induction), and (3) filtering these rules based on validation set performance and integrating them into the base prompt (deduction/verification). We evaluated CD on NL2Code and mathematical reasoning tasks, observing significant performance boosts for small and weaker language models. Notably, Mistral-7B’s accuracy on Multi-Arith increased by 20%, and Phi-3-mini-3.8B’s accuracy on HumanEval rose by 34%. Compared to other automated methods, CD offers an effective, cost-efficient strategy for improving weak models’ performance on complex tasks and enables seamless workload migration across different language models without compromising performance.
%R 10.18653/v1/2025.naacl-industry.52
%U https://aclanthology.org/2025.naacl-industry.52/
%U https://doi.org/10.18653/v1/2025.naacl-industry.52
%P 638-654
Markdown (Informal)
[Concept Distillation from Strong to Weak Models via Hypotheses-to-Theories Prompting](https://aclanthology.org/2025.naacl-industry.52/) (Boateng et al., NAACL 2025)
ACL
- Emmanuel Aboah Boateng, Cassiano O Becker, Nabiha Asghar, Kabir Walia, Ashwin Srinivasan, Ehi Nosakhare, Soundararajan Srinivasan, and Victor Dibia. 2025. Concept Distillation from Strong to Weak Models via Hypotheses-to-Theories Prompting. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track), pages 638–654, Albuquerque, New Mexico. Association for Computational Linguistics.