@inproceedings{ojha-etal-2025-somd,
title = "{SOMD} 2025: Fine-tuning {M}odern{BERT} for In- and Out-of-Distribution {NER} and Relation Extraction of Software Mentions in Scientific Texts",
author = "Ojha, Vaghawan and
Shakya, Projan and
Ghimire, Kristina and
Bataju, Kashish and
Mandal, Ashwini and
Gyawali, Sadikshya and
Dahal, Manish and
Awale, Manish and
Adhikari, Shital and
Rijal, Sanjay",
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sdp-1.15/",
doi = "10.18653/v1/2025.sdp-1.15",
pages = "154--163",
ISBN = "979-8-89176-265-7",
abstract = "Software mentions are ubiquitous yet remains irregularly referenced among scientific texts. In this paper, we utilized the dataset and evaluation criteria defined by SoftwareMention Detection (SOMD 2025) competition to solve the problem of Named Entity Recognition (NER) and Relation Extraction (RE) in input sentences from scientific texts. During the competition, we achieved a leading F1 SOMD score of 0.89 in Phase I by first fine-tuning ModernBERT for NER, and then using the extracted entity pairs for RE. Additionally, we trained a model that jointly optimizes entity and relation losses, leading to an improvement in F1 SOMD score to 0.92. Retraining the same model on an augmented dataset, we achieved the second best F1 SOMD score of 0.55 in Phase II. In the Open Submission phase, we experimented with adapative fine-tuning, achieving an F1 SOMD score of 0.6, with the best macro average for NER being 0.69. Our work shows the efficiency of fine-tuning a niche task like software mention detection despite having limited data and the promise of adaptive fine-tuning on Out of Distribution (OOD) dataset."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ojha-etal-2025-somd">
<titleInfo>
<title>SOMD 2025: Fine-tuning ModernBERT for In- and Out-of-Distribution NER and Relation Extraction of Software Mentions in Scientific Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vaghawan</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Projan</namePart>
<namePart type="family">Shakya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Ghimire</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kashish</namePart>
<namePart type="family">Bataju</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Mandal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadikshya</namePart>
<namePart type="family">Gyawali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Dahal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Awale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shital</namePart>
<namePart type="family">Adhikari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanjay</namePart>
<namePart type="family">Rijal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonja</namePart>
<namePart type="family">Schimmler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">De Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-265-7</identifier>
</relatedItem>
<abstract>Software mentions are ubiquitous yet remains irregularly referenced among scientific texts. In this paper, we utilized the dataset and evaluation criteria defined by SoftwareMention Detection (SOMD 2025) competition to solve the problem of Named Entity Recognition (NER) and Relation Extraction (RE) in input sentences from scientific texts. During the competition, we achieved a leading F1 SOMD score of 0.89 in Phase I by first fine-tuning ModernBERT for NER, and then using the extracted entity pairs for RE. Additionally, we trained a model that jointly optimizes entity and relation losses, leading to an improvement in F1 SOMD score to 0.92. Retraining the same model on an augmented dataset, we achieved the second best F1 SOMD score of 0.55 in Phase II. In the Open Submission phase, we experimented with adapative fine-tuning, achieving an F1 SOMD score of 0.6, with the best macro average for NER being 0.69. Our work shows the efficiency of fine-tuning a niche task like software mention detection despite having limited data and the promise of adaptive fine-tuning on Out of Distribution (OOD) dataset.</abstract>
<identifier type="citekey">ojha-etal-2025-somd</identifier>
<identifier type="doi">10.18653/v1/2025.sdp-1.15</identifier>
<location>
<url>https://aclanthology.org/2025.sdp-1.15/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>154</start>
<end>163</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SOMD 2025: Fine-tuning ModernBERT for In- and Out-of-Distribution NER and Relation Extraction of Software Mentions in Scientific Texts
%A Ojha, Vaghawan
%A Shakya, Projan
%A Ghimire, Kristina
%A Bataju, Kashish
%A Mandal, Ashwini
%A Gyawali, Sadikshya
%A Dahal, Manish
%A Awale, Manish
%A Adhikari, Shital
%A Rijal, Sanjay
%Y Ghosal, Tirthankar
%Y Mayr, Philipp
%Y Singh, Amanpreet
%Y Naik, Aakanksha
%Y Rehm, Georg
%Y Freitag, Dayne
%Y Li, Dan
%Y Schimmler, Sonja
%Y De Waard, Anita
%S Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-265-7
%F ojha-etal-2025-somd
%X Software mentions are ubiquitous yet remains irregularly referenced among scientific texts. In this paper, we utilized the dataset and evaluation criteria defined by SoftwareMention Detection (SOMD 2025) competition to solve the problem of Named Entity Recognition (NER) and Relation Extraction (RE) in input sentences from scientific texts. During the competition, we achieved a leading F1 SOMD score of 0.89 in Phase I by first fine-tuning ModernBERT for NER, and then using the extracted entity pairs for RE. Additionally, we trained a model that jointly optimizes entity and relation losses, leading to an improvement in F1 SOMD score to 0.92. Retraining the same model on an augmented dataset, we achieved the second best F1 SOMD score of 0.55 in Phase II. In the Open Submission phase, we experimented with adapative fine-tuning, achieving an F1 SOMD score of 0.6, with the best macro average for NER being 0.69. Our work shows the efficiency of fine-tuning a niche task like software mention detection despite having limited data and the promise of adaptive fine-tuning on Out of Distribution (OOD) dataset.
%R 10.18653/v1/2025.sdp-1.15
%U https://aclanthology.org/2025.sdp-1.15/
%U https://doi.org/10.18653/v1/2025.sdp-1.15
%P 154-163
Markdown (Informal)
[SOMD 2025: Fine-tuning ModernBERT for In- and Out-of-Distribution NER and Relation Extraction of Software Mentions in Scientific Texts](https://aclanthology.org/2025.sdp-1.15/) (Ojha et al., sdp 2025)
ACL
- Vaghawan Ojha, Projan Shakya, Kristina Ghimire, Kashish Bataju, Ashwini Mandal, Sadikshya Gyawali, Manish Dahal, Manish Awale, Shital Adhikari, and Sanjay Rijal. 2025. SOMD 2025: Fine-tuning ModernBERT for In- and Out-of-Distribution NER and Relation Extraction of Software Mentions in Scientific Texts. In Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025), pages 154–163, Vienna, Austria. Association for Computational Linguistics.