@inproceedings{fu-etal-2024-learning,
title = "Learning to Paraphrase for Alignment with {LLM} Preference",
author = "Fu, Junbo and
Zhao, Guoshuai and
Deng, Yimin and
Mi, Yunqi and
Qian, Xueming",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.134",
pages = "2394--2407",
abstract = "Large Language Models (LLMs) exhibit the issue of paraphrase divergence. This means that when a question is phrased in a slightly different but semantically similar way, LLM may output a wrong response despite being able to answer the original question correctly. Previous research has regarded this issue as a problem of the model{'}s robustness to question paraphrase and proposed a retraining method to address it. However, retraining faces challenges in meeting the computational costs and privacy security demands of LLMs. In this paper, we regard this issue as a problem of alignment with model preferences and propose PEARL (Preference-drivEn pAraphRase Learning). This is a black-box method that enhances model performance by paraphrasing questions in expressions preferred by the model. We validate PEARL across six datasets spanning three tasks: open-domain QA, commonsense reasoning, and math word problem. Extensive experiments demonstrated not only the outstanding performance but also the composability, transferability, and immense potential of PEARL, shedding new light on the black-box tuning of LLMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fu-etal-2024-learning">
<titleInfo>
<title>Learning to Paraphrase for Alignment with LLM Preference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junbo</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guoshuai</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yimin</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunqi</namePart>
<namePart type="family">Mi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xueming</namePart>
<namePart type="family">Qian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) exhibit the issue of paraphrase divergence. This means that when a question is phrased in a slightly different but semantically similar way, LLM may output a wrong response despite being able to answer the original question correctly. Previous research has regarded this issue as a problem of the model’s robustness to question paraphrase and proposed a retraining method to address it. However, retraining faces challenges in meeting the computational costs and privacy security demands of LLMs. In this paper, we regard this issue as a problem of alignment with model preferences and propose PEARL (Preference-drivEn pAraphRase Learning). This is a black-box method that enhances model performance by paraphrasing questions in expressions preferred by the model. We validate PEARL across six datasets spanning three tasks: open-domain QA, commonsense reasoning, and math word problem. Extensive experiments demonstrated not only the outstanding performance but also the composability, transferability, and immense potential of PEARL, shedding new light on the black-box tuning of LLMs.</abstract>
<identifier type="citekey">fu-etal-2024-learning</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.134</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>2394</start>
<end>2407</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning to Paraphrase for Alignment with LLM Preference
%A Fu, Junbo
%A Zhao, Guoshuai
%A Deng, Yimin
%A Mi, Yunqi
%A Qian, Xueming
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F fu-etal-2024-learning
%X Large Language Models (LLMs) exhibit the issue of paraphrase divergence. This means that when a question is phrased in a slightly different but semantically similar way, LLM may output a wrong response despite being able to answer the original question correctly. Previous research has regarded this issue as a problem of the model’s robustness to question paraphrase and proposed a retraining method to address it. However, retraining faces challenges in meeting the computational costs and privacy security demands of LLMs. In this paper, we regard this issue as a problem of alignment with model preferences and propose PEARL (Preference-drivEn pAraphRase Learning). This is a black-box method that enhances model performance by paraphrasing questions in expressions preferred by the model. We validate PEARL across six datasets spanning three tasks: open-domain QA, commonsense reasoning, and math word problem. Extensive experiments demonstrated not only the outstanding performance but also the composability, transferability, and immense potential of PEARL, shedding new light on the black-box tuning of LLMs.
%U https://aclanthology.org/2024.findings-emnlp.134
%P 2394-2407
Markdown (Informal)
[Learning to Paraphrase for Alignment with LLM Preference](https://aclanthology.org/2024.findings-emnlp.134) (Fu et al., Findings 2024)
ACL
- Junbo Fu, Guoshuai Zhao, Yimin Deng, Yunqi Mi, and Xueming Qian. 2024. Learning to Paraphrase for Alignment with LLM Preference. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 2394–2407, Miami, Florida, USA. Association for Computational Linguistics.