@inproceedings{sugiyama-etal-2025-paraphrase,
title = "Paraphrase-based Contrastive Learning for Sentence Pair Modeling",
author = "Sugiyama, Seiji and
Kondo, Risa and
Kajiwara, Tomoyuki and
Ninomiya, Takashi",
editor = "Ebrahimi, Abteen and
Haider, Samar and
Liu, Emmy and
Haider, Sammar and
Leonor Pacheco, Maria and
Wein, Shira",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)",
month = apr,
year = "2025",
address = "Albuquerque, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-srw.39/",
doi = "10.18653/v1/2025.naacl-srw.39",
pages = "400--407",
ISBN = "979-8-89176-192-6",
abstract = "To improve the performance of sentence pair modeling tasks, we propose an additional pre-training method, also known as transfer fine-tuning, for pre-trained masked language models.Pre-training for masked language modeling is not necessarily designed to bring semantically similar sentences closer together in the embedding space.Our proposed method aims to improve the performance of sentence pair modeling by applying contrastive learning to pre-trained masked language models, in which sentence embeddings of paraphrase pairs are made similar to each other.While natural language inference corpora, which are standard in previous studies on contrastive learning, are not available on a large-scale for non-English languages, our method can construct a training corpus for contrastive learning from a raw corpus and a paraphrase dictionary at a low cost.Experimental results on four sentence pair modeling tasks revealed the effectiveness of our method in both English and Japanese."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sugiyama-etal-2025-paraphrase">
<titleInfo>
<title>Paraphrase-based Contrastive Learning for Sentence Pair Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seiji</namePart>
<namePart type="family">Sugiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Risa</namePart>
<namePart type="family">Kondo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoyuki</namePart>
<namePart type="family">Kajiwara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Ninomiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samar</namePart>
<namePart type="family">Haider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmy</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sammar</namePart>
<namePart type="family">Haider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Leonor Pacheco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shira</namePart>
<namePart type="family">Wein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-192-6</identifier>
</relatedItem>
<abstract>To improve the performance of sentence pair modeling tasks, we propose an additional pre-training method, also known as transfer fine-tuning, for pre-trained masked language models.Pre-training for masked language modeling is not necessarily designed to bring semantically similar sentences closer together in the embedding space.Our proposed method aims to improve the performance of sentence pair modeling by applying contrastive learning to pre-trained masked language models, in which sentence embeddings of paraphrase pairs are made similar to each other.While natural language inference corpora, which are standard in previous studies on contrastive learning, are not available on a large-scale for non-English languages, our method can construct a training corpus for contrastive learning from a raw corpus and a paraphrase dictionary at a low cost.Experimental results on four sentence pair modeling tasks revealed the effectiveness of our method in both English and Japanese.</abstract>
<identifier type="citekey">sugiyama-etal-2025-paraphrase</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-srw.39</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-srw.39/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>400</start>
<end>407</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Paraphrase-based Contrastive Learning for Sentence Pair Modeling
%A Sugiyama, Seiji
%A Kondo, Risa
%A Kajiwara, Tomoyuki
%A Ninomiya, Takashi
%Y Ebrahimi, Abteen
%Y Haider, Samar
%Y Liu, Emmy
%Y Haider, Sammar
%Y Leonor Pacheco, Maria
%Y Wein, Shira
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, USA
%@ 979-8-89176-192-6
%F sugiyama-etal-2025-paraphrase
%X To improve the performance of sentence pair modeling tasks, we propose an additional pre-training method, also known as transfer fine-tuning, for pre-trained masked language models.Pre-training for masked language modeling is not necessarily designed to bring semantically similar sentences closer together in the embedding space.Our proposed method aims to improve the performance of sentence pair modeling by applying contrastive learning to pre-trained masked language models, in which sentence embeddings of paraphrase pairs are made similar to each other.While natural language inference corpora, which are standard in previous studies on contrastive learning, are not available on a large-scale for non-English languages, our method can construct a training corpus for contrastive learning from a raw corpus and a paraphrase dictionary at a low cost.Experimental results on four sentence pair modeling tasks revealed the effectiveness of our method in both English and Japanese.
%R 10.18653/v1/2025.naacl-srw.39
%U https://aclanthology.org/2025.naacl-srw.39/
%U https://doi.org/10.18653/v1/2025.naacl-srw.39
%P 400-407
Markdown (Informal)
[Paraphrase-based Contrastive Learning for Sentence Pair Modeling](https://aclanthology.org/2025.naacl-srw.39/) (Sugiyama et al., NAACL 2025)
ACL
- Seiji Sugiyama, Risa Kondo, Tomoyuki Kajiwara, and Takashi Ninomiya. 2025. Paraphrase-based Contrastive Learning for Sentence Pair Modeling. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop), pages 400–407, Albuquerque, USA. Association for Computational Linguistics.