@inproceedings{thirukovalluru-etal-2024-sumcse,
title = "{S}um{CSE}: Summary as a transformation for Contrastive Learning",
author = "Thirukovalluru, Raghuveer and
Wang, Xiaolan and
Chen, Jun and
Li, Shuyang and
Lei, Jie and
Jin, Rong and
Dhingra, Bhuwan",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.227",
doi = "10.18653/v1/2024.findings-naacl.227",
pages = "3577--3588",
abstract = "Sentence embedding models are typically trained using contrastive learning (CL), either using human annotations directly or by repurposing other annotated datasets. In this work, we explore the recently introduced paradigm of generating CL data using generative language models (LM). In CL for computer vision (CV), compositional transformations (series of operations applied over an image. e.g. cropping + color distortion) which modify the input/image to retain minimal information were shown to be very effective. We show that composition of a {`}Summary{'} transformation with diverse paraphrasing/contradicting transformations accomplishes the same and works very well in CL for sentence embeddings. Our final generated dataset (using Vicuna-13B) significantly outperforms the previous best unsupervised method (using ChatGPT) by 1.8 points, and SimCSE, a strong supervised baseline by 0.3 points on the semantic text similarity (STS) benchmark.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thirukovalluru-etal-2024-sumcse">
<titleInfo>
<title>SumCSE: Summary as a transformation for Contrastive Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raghuveer</namePart>
<namePart type="family">Thirukovalluru</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaolan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuyang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jie</namePart>
<namePart type="family">Lei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rong</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhuwan</namePart>
<namePart type="family">Dhingra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentence embedding models are typically trained using contrastive learning (CL), either using human annotations directly or by repurposing other annotated datasets. In this work, we explore the recently introduced paradigm of generating CL data using generative language models (LM). In CL for computer vision (CV), compositional transformations (series of operations applied over an image. e.g. cropping + color distortion) which modify the input/image to retain minimal information were shown to be very effective. We show that composition of a ‘Summary’ transformation with diverse paraphrasing/contradicting transformations accomplishes the same and works very well in CL for sentence embeddings. Our final generated dataset (using Vicuna-13B) significantly outperforms the previous best unsupervised method (using ChatGPT) by 1.8 points, and SimCSE, a strong supervised baseline by 0.3 points on the semantic text similarity (STS) benchmark.</abstract>
<identifier type="citekey">thirukovalluru-etal-2024-sumcse</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.227</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.227</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>3577</start>
<end>3588</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SumCSE: Summary as a transformation for Contrastive Learning
%A Thirukovalluru, Raghuveer
%A Wang, Xiaolan
%A Chen, Jun
%A Li, Shuyang
%A Lei, Jie
%A Jin, Rong
%A Dhingra, Bhuwan
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F thirukovalluru-etal-2024-sumcse
%X Sentence embedding models are typically trained using contrastive learning (CL), either using human annotations directly or by repurposing other annotated datasets. In this work, we explore the recently introduced paradigm of generating CL data using generative language models (LM). In CL for computer vision (CV), compositional transformations (series of operations applied over an image. e.g. cropping + color distortion) which modify the input/image to retain minimal information were shown to be very effective. We show that composition of a ‘Summary’ transformation with diverse paraphrasing/contradicting transformations accomplishes the same and works very well in CL for sentence embeddings. Our final generated dataset (using Vicuna-13B) significantly outperforms the previous best unsupervised method (using ChatGPT) by 1.8 points, and SimCSE, a strong supervised baseline by 0.3 points on the semantic text similarity (STS) benchmark.
%R 10.18653/v1/2024.findings-naacl.227
%U https://aclanthology.org/2024.findings-naacl.227
%U https://doi.org/10.18653/v1/2024.findings-naacl.227
%P 3577-3588
Markdown (Informal)
[SumCSE: Summary as a transformation for Contrastive Learning](https://aclanthology.org/2024.findings-naacl.227) (Thirukovalluru et al., Findings 2024)
ACL
- Raghuveer Thirukovalluru, Xiaolan Wang, Jun Chen, Shuyang Li, Jie Lei, Rong Jin, and Bhuwan Dhingra. 2024. SumCSE: Summary as a transformation for Contrastive Learning. In Findings of the Association for Computational Linguistics: NAACL 2024, pages 3577–3588, Mexico City, Mexico. Association for Computational Linguistics.