@inproceedings{fang-etal-2024-efficiently,
title = "Efficiently Acquiring Human Feedback with {B}ayesian Deep Learning",
author = "Fang, Haishuo and
Gor, Jeet and
Simpson, Edwin",
editor = {V{\'a}zquez, Ra{\'u}l and
Celikkanat, Hande and
Ulmer, Dennis and
Tiedemann, J{\"o}rg and
Swayamdipta, Swabha and
Aziz, Wilker and
Plank, Barbara and
Baan, Joris and
de Marneffe, Marie-Catherine},
booktitle = "Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)",
month = mar,
year = "2024",
address = "St Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.uncertainlp-1.7/",
pages = "70--80",
abstract = "Learning from human feedback can improve models for text generation or passage ranking, aligning them better to a user`s needs. Data is often collected by asking users to compare alternative outputs to a given input, which may require a large number of comparisons to learn a ranking function. The amount of comparisons needed can be reduced using Bayesian Optimisation (BO) to query the user about only the most promising candidate outputs. Previous applications of BO to text ranking relied on shallow surrogate models to learn ranking functions over candidate outputs,and were therefore unable to fine-tune rankers based on deep, pretrained language models. This paper leverages Bayesian deep learning (BDL) to adapt pretrained language models to highly specialised text ranking tasks, using BO to tune the model with a small number of pairwise preferences between candidate outputs. We apply our approach to community question answering (cQA) and extractive multi-document summarisation (MDS) with simulated noisy users, finding that our BDL approach significantly outperforms both a shallow Gaussian process model and traditional active learning with a standard deep neural network, while remaining robust to noise in the user feedback."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fang-etal-2024-efficiently">
<titleInfo>
<title>Efficiently Acquiring Human Feedback with Bayesian Deep Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haishuo</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeet</namePart>
<namePart type="family">Gor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edwin</namePart>
<namePart type="family">Simpson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raúl</namePart>
<namePart type="family">Vázquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hande</namePart>
<namePart type="family">Celikkanat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dennis</namePart>
<namePart type="family">Ulmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swabha</namePart>
<namePart type="family">Swayamdipta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wilker</namePart>
<namePart type="family">Aziz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joris</namePart>
<namePart type="family">Baan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Learning from human feedback can improve models for text generation or passage ranking, aligning them better to a user‘s needs. Data is often collected by asking users to compare alternative outputs to a given input, which may require a large number of comparisons to learn a ranking function. The amount of comparisons needed can be reduced using Bayesian Optimisation (BO) to query the user about only the most promising candidate outputs. Previous applications of BO to text ranking relied on shallow surrogate models to learn ranking functions over candidate outputs,and were therefore unable to fine-tune rankers based on deep, pretrained language models. This paper leverages Bayesian deep learning (BDL) to adapt pretrained language models to highly specialised text ranking tasks, using BO to tune the model with a small number of pairwise preferences between candidate outputs. We apply our approach to community question answering (cQA) and extractive multi-document summarisation (MDS) with simulated noisy users, finding that our BDL approach significantly outperforms both a shallow Gaussian process model and traditional active learning with a standard deep neural network, while remaining robust to noise in the user feedback.</abstract>
<identifier type="citekey">fang-etal-2024-efficiently</identifier>
<location>
<url>https://aclanthology.org/2024.uncertainlp-1.7/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>70</start>
<end>80</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Efficiently Acquiring Human Feedback with Bayesian Deep Learning
%A Fang, Haishuo
%A Gor, Jeet
%A Simpson, Edwin
%Y Vázquez, Raúl
%Y Celikkanat, Hande
%Y Ulmer, Dennis
%Y Tiedemann, Jörg
%Y Swayamdipta, Swabha
%Y Aziz, Wilker
%Y Plank, Barbara
%Y Baan, Joris
%Y de Marneffe, Marie-Catherine
%S Proceedings of the 1st Workshop on Uncertainty-Aware NLP (UncertaiNLP 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St Julians, Malta
%F fang-etal-2024-efficiently
%X Learning from human feedback can improve models for text generation or passage ranking, aligning them better to a user‘s needs. Data is often collected by asking users to compare alternative outputs to a given input, which may require a large number of comparisons to learn a ranking function. The amount of comparisons needed can be reduced using Bayesian Optimisation (BO) to query the user about only the most promising candidate outputs. Previous applications of BO to text ranking relied on shallow surrogate models to learn ranking functions over candidate outputs,and were therefore unable to fine-tune rankers based on deep, pretrained language models. This paper leverages Bayesian deep learning (BDL) to adapt pretrained language models to highly specialised text ranking tasks, using BO to tune the model with a small number of pairwise preferences between candidate outputs. We apply our approach to community question answering (cQA) and extractive multi-document summarisation (MDS) with simulated noisy users, finding that our BDL approach significantly outperforms both a shallow Gaussian process model and traditional active learning with a standard deep neural network, while remaining robust to noise in the user feedback.
%U https://aclanthology.org/2024.uncertainlp-1.7/
%P 70-80
Markdown (Informal)
[Efficiently Acquiring Human Feedback with Bayesian Deep Learning](https://aclanthology.org/2024.uncertainlp-1.7/) (Fang et al., UncertaiNLP 2024)
ACL