@inproceedings{yan-etal-2024-consolidating,
title = "Consolidating Ranking and Relevance Predictions of Large Language Models through Post-Processing",
author = "Yan, Le and
Qin, Zhen and
Zhuang, Honglei and
Jagerman, Rolf and
Wang, Xuanhui and
Bendersky, Michael and
Oosterhuis, Harrie",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.25/",
doi = "10.18653/v1/2024.emnlp-main.25",
pages = "410--423",
abstract = "The powerful generative abilities of large language models (LLMs) show potential in generating relevance labels for search applications. Previous work has found that directly asking about relevancy, such as ''*How relevant is document A to query Q?*{\textquotedblright}, results in suboptimal ranking. Instead, the pairwise-ranking prompting (PRP) approach produces promising ranking performance through asking about pairwise comparisons, e.g., ''*Is document A more relevant than document B to query Q?*{\textquotedblright}. Thus, while LLMs are effective at their ranking ability, this is not reflected in their relevance label generation.In this work, we propose a post-processing method to consolidate the relevance labels generated by an LLM with its powerful ranking abilities. Our method takes both LLM generated relevance labels and pairwise preferences. The labels are then altered to satisfy the pairwise preferences of the LLM, while staying as close to the original values as possible. Our experimental results indicate that our approach effectively balances label accuracy and ranking performance. Thereby, our work shows it is possible to combine both the ranking and labeling abilities of LLMs through post-processing."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yan-etal-2024-consolidating">
<titleInfo>
<title>Consolidating Ranking and Relevance Predictions of Large Language Models through Post-Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Le</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhen</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Honglei</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolf</namePart>
<namePart type="family">Jagerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanhui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Bendersky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harrie</namePart>
<namePart type="family">Oosterhuis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The powerful generative abilities of large language models (LLMs) show potential in generating relevance labels for search applications. Previous work has found that directly asking about relevancy, such as ”*How relevant is document A to query Q?*”, results in suboptimal ranking. Instead, the pairwise-ranking prompting (PRP) approach produces promising ranking performance through asking about pairwise comparisons, e.g., ”*Is document A more relevant than document B to query Q?*”. Thus, while LLMs are effective at their ranking ability, this is not reflected in their relevance label generation.In this work, we propose a post-processing method to consolidate the relevance labels generated by an LLM with its powerful ranking abilities. Our method takes both LLM generated relevance labels and pairwise preferences. The labels are then altered to satisfy the pairwise preferences of the LLM, while staying as close to the original values as possible. Our experimental results indicate that our approach effectively balances label accuracy and ranking performance. Thereby, our work shows it is possible to combine both the ranking and labeling abilities of LLMs through post-processing.</abstract>
<identifier type="citekey">yan-etal-2024-consolidating</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.25</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.25/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>410</start>
<end>423</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Consolidating Ranking and Relevance Predictions of Large Language Models through Post-Processing
%A Yan, Le
%A Qin, Zhen
%A Zhuang, Honglei
%A Jagerman, Rolf
%A Wang, Xuanhui
%A Bendersky, Michael
%A Oosterhuis, Harrie
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F yan-etal-2024-consolidating
%X The powerful generative abilities of large language models (LLMs) show potential in generating relevance labels for search applications. Previous work has found that directly asking about relevancy, such as ”*How relevant is document A to query Q?*”, results in suboptimal ranking. Instead, the pairwise-ranking prompting (PRP) approach produces promising ranking performance through asking about pairwise comparisons, e.g., ”*Is document A more relevant than document B to query Q?*”. Thus, while LLMs are effective at their ranking ability, this is not reflected in their relevance label generation.In this work, we propose a post-processing method to consolidate the relevance labels generated by an LLM with its powerful ranking abilities. Our method takes both LLM generated relevance labels and pairwise preferences. The labels are then altered to satisfy the pairwise preferences of the LLM, while staying as close to the original values as possible. Our experimental results indicate that our approach effectively balances label accuracy and ranking performance. Thereby, our work shows it is possible to combine both the ranking and labeling abilities of LLMs through post-processing.
%R 10.18653/v1/2024.emnlp-main.25
%U https://aclanthology.org/2024.emnlp-main.25/
%U https://doi.org/10.18653/v1/2024.emnlp-main.25
%P 410-423
Markdown (Informal)
[Consolidating Ranking and Relevance Predictions of Large Language Models through Post-Processing](https://aclanthology.org/2024.emnlp-main.25/) (Yan et al., EMNLP 2024)
ACL
- Le Yan, Zhen Qin, Honglei Zhuang, Rolf Jagerman, Xuanhui Wang, Michael Bendersky, and Harrie Oosterhuis. 2024. Consolidating Ranking and Relevance Predictions of Large Language Models through Post-Processing. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 410–423, Miami, Florida, USA. Association for Computational Linguistics.