@inproceedings{kanakarajan-etal-2022-biosimcse,
title = "{B}io{S}im{CSE}: {B}io{M}edical Sentence Embeddings using Contrastive learning",
author = "Kanakarajan, Kamal raj and
Kundumani, Bhuvana and
Abraham, Abhijith and
Sankarasubbu, Malaikannan",
editor = "Lavelli, Alberto and
Holderness, Eben and
Jimeno Yepes, Antonio and
Minard, Anne-Lyse and
Pustejovsky, James and
Rinaldi, Fabio",
booktitle = "Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.louhi-1.10",
doi = "10.18653/v1/2022.louhi-1.10",
pages = "81--86",
abstract = "Sentence embeddings in the form of fixed-size vectors that capture the information in the sentence as well as the context are critical components of Natural Language Processing systems. With transformer model based sentence encoders outperforming the other sentence embedding methods in the general domain, we explore the transformer based architectures to generate dense sentence embeddings in the biomedical domain. In this work, we present BioSimCSE, where we train sentence embeddings with domain specific transformer based models with biomedical texts. We assess our model{'}s performance with zero-shot and fine-tuned settings on Semantic Textual Similarity (STS) and Recognizing Question Entailment (RQE) tasks. Our BioSimCSE model using BioLinkBERT achieves state of the art (SOTA) performance on both tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kanakarajan-etal-2022-biosimcse">
<titleInfo>
<title>BioSimCSE: BioMedical Sentence Embeddings using Contrastive learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kamal</namePart>
<namePart type="given">raj</namePart>
<namePart type="family">Kanakarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhuvana</namePart>
<namePart type="family">Kundumani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhijith</namePart>
<namePart type="family">Abraham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malaikannan</namePart>
<namePart type="family">Sankarasubbu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eben</namePart>
<namePart type="family">Holderness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentence embeddings in the form of fixed-size vectors that capture the information in the sentence as well as the context are critical components of Natural Language Processing systems. With transformer model based sentence encoders outperforming the other sentence embedding methods in the general domain, we explore the transformer based architectures to generate dense sentence embeddings in the biomedical domain. In this work, we present BioSimCSE, where we train sentence embeddings with domain specific transformer based models with biomedical texts. We assess our model’s performance with zero-shot and fine-tuned settings on Semantic Textual Similarity (STS) and Recognizing Question Entailment (RQE) tasks. Our BioSimCSE model using BioLinkBERT achieves state of the art (SOTA) performance on both tasks.</abstract>
<identifier type="citekey">kanakarajan-etal-2022-biosimcse</identifier>
<identifier type="doi">10.18653/v1/2022.louhi-1.10</identifier>
<location>
<url>https://aclanthology.org/2022.louhi-1.10</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>81</start>
<end>86</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BioSimCSE: BioMedical Sentence Embeddings using Contrastive learning
%A Kanakarajan, Kamal raj
%A Kundumani, Bhuvana
%A Abraham, Abhijith
%A Sankarasubbu, Malaikannan
%Y Lavelli, Alberto
%Y Holderness, Eben
%Y Jimeno Yepes, Antonio
%Y Minard, Anne-Lyse
%Y Pustejovsky, James
%Y Rinaldi, Fabio
%S Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F kanakarajan-etal-2022-biosimcse
%X Sentence embeddings in the form of fixed-size vectors that capture the information in the sentence as well as the context are critical components of Natural Language Processing systems. With transformer model based sentence encoders outperforming the other sentence embedding methods in the general domain, we explore the transformer based architectures to generate dense sentence embeddings in the biomedical domain. In this work, we present BioSimCSE, where we train sentence embeddings with domain specific transformer based models with biomedical texts. We assess our model’s performance with zero-shot and fine-tuned settings on Semantic Textual Similarity (STS) and Recognizing Question Entailment (RQE) tasks. Our BioSimCSE model using BioLinkBERT achieves state of the art (SOTA) performance on both tasks.
%R 10.18653/v1/2022.louhi-1.10
%U https://aclanthology.org/2022.louhi-1.10
%U https://doi.org/10.18653/v1/2022.louhi-1.10
%P 81-86
Markdown (Informal)
[BioSimCSE: BioMedical Sentence Embeddings using Contrastive learning](https://aclanthology.org/2022.louhi-1.10) (Kanakarajan et al., Louhi 2022)
ACL
- Kamal raj Kanakarajan, Bhuvana Kundumani, Abhijith Abraham, and Malaikannan Sankarasubbu. 2022. BioSimCSE: BioMedical Sentence Embeddings using Contrastive learning. In Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI), pages 81–86, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.