@inproceedings{pankaj-gautam-2022-augmented,
title = "Augmented Bio-{SBERT}: Improving Performance for Pairwise Sentence Tasks in Bio-medical Domain",
author = "Pankaj, Sonam and
Gautam, Amit",
booktitle = "Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.loresmt-1.6",
pages = "43--47",
abstract = "One of the modern challenges in AI is the access to high-quality and annotated data, especially in NLP; that is why augmentation is gaining importance. In computer vision, where image data augmentation is standard, text data augmentation in NLP is complex due to the high complexity of language. Moreover, we have seen the advantages of augmentation where there are fewer data available, which can significantly improve the model{'}s accuracy and performance. We have implemented Augmentation in Pairwise sentence scoring in the biomedical domain. By experimenting with our approach to downstream tasks on biomedical data, we have looked into the solution to improve Bi-encoders{'} sentence transformer performance using an augmented dataset generated by cross-encoders fine-tuned on Biosses and MedNLI on the pre-trained Bio-BERT model. It has significantly improved the results with respect to the model only trained on Gold data for the respective tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pankaj-gautam-2022-augmented">
<titleInfo>
<title>Augmented Bio-SBERT: Improving Performance for Pairwise Sentence Tasks in Bio-medical Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sonam</namePart>
<namePart type="family">Pankaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Gautam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the modern challenges in AI is the access to high-quality and annotated data, especially in NLP; that is why augmentation is gaining importance. In computer vision, where image data augmentation is standard, text data augmentation in NLP is complex due to the high complexity of language. Moreover, we have seen the advantages of augmentation where there are fewer data available, which can significantly improve the model’s accuracy and performance. We have implemented Augmentation in Pairwise sentence scoring in the biomedical domain. By experimenting with our approach to downstream tasks on biomedical data, we have looked into the solution to improve Bi-encoders’ sentence transformer performance using an augmented dataset generated by cross-encoders fine-tuned on Biosses and MedNLI on the pre-trained Bio-BERT model. It has significantly improved the results with respect to the model only trained on Gold data for the respective tasks.</abstract>
<identifier type="citekey">pankaj-gautam-2022-augmented</identifier>
<location>
<url>https://aclanthology.org/2022.loresmt-1.6</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>43</start>
<end>47</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Augmented Bio-SBERT: Improving Performance for Pairwise Sentence Tasks in Bio-medical Domain
%A Pankaj, Sonam
%A Gautam, Amit
%S Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F pankaj-gautam-2022-augmented
%X One of the modern challenges in AI is the access to high-quality and annotated data, especially in NLP; that is why augmentation is gaining importance. In computer vision, where image data augmentation is standard, text data augmentation in NLP is complex due to the high complexity of language. Moreover, we have seen the advantages of augmentation where there are fewer data available, which can significantly improve the model’s accuracy and performance. We have implemented Augmentation in Pairwise sentence scoring in the biomedical domain. By experimenting with our approach to downstream tasks on biomedical data, we have looked into the solution to improve Bi-encoders’ sentence transformer performance using an augmented dataset generated by cross-encoders fine-tuned on Biosses and MedNLI on the pre-trained Bio-BERT model. It has significantly improved the results with respect to the model only trained on Gold data for the respective tasks.
%U https://aclanthology.org/2022.loresmt-1.6
%P 43-47
Markdown (Informal)
[Augmented Bio-SBERT: Improving Performance for Pairwise Sentence Tasks in Bio-medical Domain](https://aclanthology.org/2022.loresmt-1.6) (Pankaj & Gautam, LoResMT 2022)
ACL