@inproceedings{liu-etal-2024-making,
title = "Making Large Language Models Better Reasoners with Orchestrated Streaming Experiences",
author = "Liu, Xiangyang and
He, Junliang and
Qiu, Xipeng",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.48",
doi = "10.18653/v1/2024.emnlp-main.48",
pages = "817--838",
abstract = "Large language models (LLMs) can perform complex reasoning by generating intermediate reasoning steps using chain-of-thought prompting under zero-shot or few-shot settings. However, zero-shot prompting always encounters low performance, and the superior performance of few-shot prompting hinges on the manual-crafting of task-specific demonstrations one by one. In this paper, we present **RoSE** (**R**easoning with **O**rchestrated **S**treaming **E**xperiences), a general framework for solving reasoning tasks that can self-improve as it answers various reasoning questions. To enable RoSE, we describe an architecture that extends an LLM to store all answered reasoning questions and their reasoning steps in a streaming experience pool and orchestrate helpful questions from the pool to assist itself in answering new questions. To set up a question-aware orchestration mechanism, RoSE first calculates the similarity of each question in the pool with the question to be answered. Since the solution to each question in the experience pool is not always correct, RoSE will sort the questions according to their similarity with the question to be answered, and then uniformly divide them into multiple buckets. It finally extracts one question from each bucket to make the extracted questions more diverse. To make the extracted questions help RoSE answer new questions as much as possible, we introduce two other attributes of uncertainty and complexity for each question. RoSE will preferentially select the questions with low uncertainty and high complexity from each bucket. We evaluate the versatility of RoSE in various complex reasoning tasks and LLMs, such as arithmetic and commonsense reasoning, and find that it can achieve excellent performance without any labeled data and pre-set unlabeled data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2024-making">
<titleInfo>
<title>Making Large Language Models Better Reasoners with Orchestrated Streaming Experiences</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiangyang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junliang</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) can perform complex reasoning by generating intermediate reasoning steps using chain-of-thought prompting under zero-shot or few-shot settings. However, zero-shot prompting always encounters low performance, and the superior performance of few-shot prompting hinges on the manual-crafting of task-specific demonstrations one by one. In this paper, we present **RoSE** (**R**easoning with **O**rchestrated **S**treaming **E**xperiences), a general framework for solving reasoning tasks that can self-improve as it answers various reasoning questions. To enable RoSE, we describe an architecture that extends an LLM to store all answered reasoning questions and their reasoning steps in a streaming experience pool and orchestrate helpful questions from the pool to assist itself in answering new questions. To set up a question-aware orchestration mechanism, RoSE first calculates the similarity of each question in the pool with the question to be answered. Since the solution to each question in the experience pool is not always correct, RoSE will sort the questions according to their similarity with the question to be answered, and then uniformly divide them into multiple buckets. It finally extracts one question from each bucket to make the extracted questions more diverse. To make the extracted questions help RoSE answer new questions as much as possible, we introduce two other attributes of uncertainty and complexity for each question. RoSE will preferentially select the questions with low uncertainty and high complexity from each bucket. We evaluate the versatility of RoSE in various complex reasoning tasks and LLMs, such as arithmetic and commonsense reasoning, and find that it can achieve excellent performance without any labeled data and pre-set unlabeled data.</abstract>
<identifier type="citekey">liu-etal-2024-making</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.48</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.48</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>817</start>
<end>838</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Making Large Language Models Better Reasoners with Orchestrated Streaming Experiences
%A Liu, Xiangyang
%A He, Junliang
%A Qiu, Xipeng
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F liu-etal-2024-making
%X Large language models (LLMs) can perform complex reasoning by generating intermediate reasoning steps using chain-of-thought prompting under zero-shot or few-shot settings. However, zero-shot prompting always encounters low performance, and the superior performance of few-shot prompting hinges on the manual-crafting of task-specific demonstrations one by one. In this paper, we present **RoSE** (**R**easoning with **O**rchestrated **S**treaming **E**xperiences), a general framework for solving reasoning tasks that can self-improve as it answers various reasoning questions. To enable RoSE, we describe an architecture that extends an LLM to store all answered reasoning questions and their reasoning steps in a streaming experience pool and orchestrate helpful questions from the pool to assist itself in answering new questions. To set up a question-aware orchestration mechanism, RoSE first calculates the similarity of each question in the pool with the question to be answered. Since the solution to each question in the experience pool is not always correct, RoSE will sort the questions according to their similarity with the question to be answered, and then uniformly divide them into multiple buckets. It finally extracts one question from each bucket to make the extracted questions more diverse. To make the extracted questions help RoSE answer new questions as much as possible, we introduce two other attributes of uncertainty and complexity for each question. RoSE will preferentially select the questions with low uncertainty and high complexity from each bucket. We evaluate the versatility of RoSE in various complex reasoning tasks and LLMs, such as arithmetic and commonsense reasoning, and find that it can achieve excellent performance without any labeled data and pre-set unlabeled data.
%R 10.18653/v1/2024.emnlp-main.48
%U https://aclanthology.org/2024.emnlp-main.48
%U https://doi.org/10.18653/v1/2024.emnlp-main.48
%P 817-838
Markdown (Informal)
[Making Large Language Models Better Reasoners with Orchestrated Streaming Experiences](https://aclanthology.org/2024.emnlp-main.48) (Liu et al., EMNLP 2024)
ACL