@inproceedings{parisot-zavrel-2022-multi,
title = "Multi-objective Representation Learning for Scientific Document Retrieval",
author = "Parisot, Mathias and
Zavrel, Jakub",
booktitle = "Proceedings of the Third Workshop on Scholarly Document Processing",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sdp-1.9",
pages = "80--88",
abstract = "Existing dense retrieval models for scientific documents have been optimized for either retrieval by short queries, or for document similarity, but usually not for both. In this paper, we explore the space of combining multiple objectives to achieve a single representation model that presents a good balance between both modes of dense retrieval, combining the relevance judgements from MS MARCO with the citation similarity of SPECTER, and the self-supervised objective of independent cropping. We also consider the addition of training data from document co-citation in a sentence context and domain-specific synthetic data. We show that combining multiple objectives yields models that generalize well across different benchmark tasks, improving up to 73{\%} over models trained on a single objective.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="parisot-zavrel-2022-multi">
<titleInfo>
<title>Multi-objective Representation Learning for Scientific Document Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mathias</namePart>
<namePart type="family">Parisot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Zavrel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Scholarly Document Processing</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Existing dense retrieval models for scientific documents have been optimized for either retrieval by short queries, or for document similarity, but usually not for both. In this paper, we explore the space of combining multiple objectives to achieve a single representation model that presents a good balance between both modes of dense retrieval, combining the relevance judgements from MS MARCO with the citation similarity of SPECTER, and the self-supervised objective of independent cropping. We also consider the addition of training data from document co-citation in a sentence context and domain-specific synthetic data. We show that combining multiple objectives yields models that generalize well across different benchmark tasks, improving up to 73% over models trained on a single objective.</abstract>
<identifier type="citekey">parisot-zavrel-2022-multi</identifier>
<location>
<url>https://aclanthology.org/2022.sdp-1.9</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>80</start>
<end>88</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-objective Representation Learning for Scientific Document Retrieval
%A Parisot, Mathias
%A Zavrel, Jakub
%S Proceedings of the Third Workshop on Scholarly Document Processing
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F parisot-zavrel-2022-multi
%X Existing dense retrieval models for scientific documents have been optimized for either retrieval by short queries, or for document similarity, but usually not for both. In this paper, we explore the space of combining multiple objectives to achieve a single representation model that presents a good balance between both modes of dense retrieval, combining the relevance judgements from MS MARCO with the citation similarity of SPECTER, and the self-supervised objective of independent cropping. We also consider the addition of training data from document co-citation in a sentence context and domain-specific synthetic data. We show that combining multiple objectives yields models that generalize well across different benchmark tasks, improving up to 73% over models trained on a single objective.
%U https://aclanthology.org/2022.sdp-1.9
%P 80-88
Markdown (Informal)
[Multi-objective Representation Learning for Scientific Document Retrieval](https://aclanthology.org/2022.sdp-1.9) (Parisot & Zavrel, sdp 2022)
ACL