@inproceedings{abdel-salam-etal-2024-caresai,
title = "{C}ares{AI} at {S}em{E}val-2024 Task 2: Improving Natural Language Inference in Clinical Trial Data using Model Ensemble and Data Explanation",
author = "Abdel-salam, Reem and
Adewunmi, Mary and
Akinwale, Mercy",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.266",
doi = "10.18653/v1/2024.semeval-1.266",
pages = "1905--1911",
abstract = "Large language models (LLMs) have demonstrated state-of-the-art performance across multiple domains in various natural language tasks. Entailment tasks, however, are more difficult to achieve with a high-performance model. The task is to use safe natural language models to conclude biomedical clinical trial reports (CTRs). The Natural Language Inference for Clinical Trial Data (NLI4CT) task aims to define a given entailment and hypothesis based on CTRs. This paper aims to address the challenges of medical abbreviations and numerical data that can be logically inferred from one another due to acronyms, using different data pre-processing techniques to explain such data. This paper presents a model for NLI4CT SemEval 2024 task 2 that trains the data with DeBERTa, BioLink, BERT, GPT2, BioGPT, and Clinical BERT using the best training approaches, such as fine-tuning, prompt tuning, and contrastive learning. Furthermore, to validate these models, different experiments have been carried out. Our best system is built on an ensemble of different models with different training settings, which achieves an F1 score of 0.77, a faithfulness score of 0.76, and a consistency score of 0.75 and secures the sixth rank in the official leaderboard. In conclusion, this paper has addressed challenges in medical text analysis by exploring various NLP techniques, evaluating multiple advanced natural languagemodels(NLM) models and achieving good results with the ensemble model. Additionally, this project has contributed to the advancement of safe and effective NLMs for analysing complex medical data in CTRs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdel-salam-etal-2024-caresai">
<titleInfo>
<title>CaresAI at SemEval-2024 Task 2: Improving Natural Language Inference in Clinical Trial Data using Model Ensemble and Data Explanation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reem</namePart>
<namePart type="family">Abdel-salam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Adewunmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mercy</namePart>
<namePart type="family">Akinwale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have demonstrated state-of-the-art performance across multiple domains in various natural language tasks. Entailment tasks, however, are more difficult to achieve with a high-performance model. The task is to use safe natural language models to conclude biomedical clinical trial reports (CTRs). The Natural Language Inference for Clinical Trial Data (NLI4CT) task aims to define a given entailment and hypothesis based on CTRs. This paper aims to address the challenges of medical abbreviations and numerical data that can be logically inferred from one another due to acronyms, using different data pre-processing techniques to explain such data. This paper presents a model for NLI4CT SemEval 2024 task 2 that trains the data with DeBERTa, BioLink, BERT, GPT2, BioGPT, and Clinical BERT using the best training approaches, such as fine-tuning, prompt tuning, and contrastive learning. Furthermore, to validate these models, different experiments have been carried out. Our best system is built on an ensemble of different models with different training settings, which achieves an F1 score of 0.77, a faithfulness score of 0.76, and a consistency score of 0.75 and secures the sixth rank in the official leaderboard. In conclusion, this paper has addressed challenges in medical text analysis by exploring various NLP techniques, evaluating multiple advanced natural languagemodels(NLM) models and achieving good results with the ensemble model. Additionally, this project has contributed to the advancement of safe and effective NLMs for analysing complex medical data in CTRs.</abstract>
<identifier type="citekey">abdel-salam-etal-2024-caresai</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.266</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.266</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1905</start>
<end>1911</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CaresAI at SemEval-2024 Task 2: Improving Natural Language Inference in Clinical Trial Data using Model Ensemble and Data Explanation
%A Abdel-salam, Reem
%A Adewunmi, Mary
%A Akinwale, Mercy
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F abdel-salam-etal-2024-caresai
%X Large language models (LLMs) have demonstrated state-of-the-art performance across multiple domains in various natural language tasks. Entailment tasks, however, are more difficult to achieve with a high-performance model. The task is to use safe natural language models to conclude biomedical clinical trial reports (CTRs). The Natural Language Inference for Clinical Trial Data (NLI4CT) task aims to define a given entailment and hypothesis based on CTRs. This paper aims to address the challenges of medical abbreviations and numerical data that can be logically inferred from one another due to acronyms, using different data pre-processing techniques to explain such data. This paper presents a model for NLI4CT SemEval 2024 task 2 that trains the data with DeBERTa, BioLink, BERT, GPT2, BioGPT, and Clinical BERT using the best training approaches, such as fine-tuning, prompt tuning, and contrastive learning. Furthermore, to validate these models, different experiments have been carried out. Our best system is built on an ensemble of different models with different training settings, which achieves an F1 score of 0.77, a faithfulness score of 0.76, and a consistency score of 0.75 and secures the sixth rank in the official leaderboard. In conclusion, this paper has addressed challenges in medical text analysis by exploring various NLP techniques, evaluating multiple advanced natural languagemodels(NLM) models and achieving good results with the ensemble model. Additionally, this project has contributed to the advancement of safe and effective NLMs for analysing complex medical data in CTRs.
%R 10.18653/v1/2024.semeval-1.266
%U https://aclanthology.org/2024.semeval-1.266
%U https://doi.org/10.18653/v1/2024.semeval-1.266
%P 1905-1911
Markdown (Informal)
[CaresAI at SemEval-2024 Task 2: Improving Natural Language Inference in Clinical Trial Data using Model Ensemble and Data Explanation](https://aclanthology.org/2024.semeval-1.266) (Abdel-salam et al., SemEval 2024)
ACL