@inproceedings{pratik-etal-2023-improving,
title = "Improving the Evaluation of {NLP} Approaches for Scientific Text Annotation with Ontology Embedding-Based Semantic Similarity Metrics",
author = "Pratik, Devkota and
Somya, D. Mohanty and
Prashanti, Manda",
editor = "Jyoti, D. Pawar and
Sobha, Lalitha Devi",
booktitle = "Proceedings of the 20th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2023",
address = "Goa University, Goa, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2023.icon-1.47",
pages = "516--522",
abstract = "Lexical Simplification is a challenging task that aims to improve the readability of text for nonnative people, people with dyslexia, and any linguistic impairments. It consists of 3 components: 1) Complex Word Identification 2) Substitute Generation 3) Substitute Ranking. Current methods use contextual information as a primary source in all three stages of the simplification pipeline. We argue that while context is an important measure, it alone is not sufficient in the process. In the complex word identification step, contextual information is inadequate, moreover, heavy feature engineering is required to use additional linguistic features. This paper presents a novel architecture for complex word identification that uses a pre-trained transformer model{'}s information flow through its hidden layers as a feature representation that implicitly encodes all the features required for identification. We portray how database methods and masked language modeling can be complementary to one another in substitute generation and ranking process that is built on the foundational pillars of Simplicity, Grammatical and Semantic correctness, and context preservation. We show that our proposed model generalizes well and outperforms the current state-of-the-art on wellknown datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pratik-etal-2023-improving">
<titleInfo>
<title>Improving the Evaluation of NLP Approaches for Scientific Text Annotation with Ontology Embedding-Based Semantic Similarity Metrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Devkota</namePart>
<namePart type="family">Pratik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">D</namePart>
<namePart type="given">Mohanty</namePart>
<namePart type="family">Somya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manda</namePart>
<namePart type="family">Prashanti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">D</namePart>
<namePart type="given">Pawar</namePart>
<namePart type="family">Jyoti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lalitha</namePart>
<namePart type="given">Devi</namePart>
<namePart type="family">Sobha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Goa University, Goa, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lexical Simplification is a challenging task that aims to improve the readability of text for nonnative people, people with dyslexia, and any linguistic impairments. It consists of 3 components: 1) Complex Word Identification 2) Substitute Generation 3) Substitute Ranking. Current methods use contextual information as a primary source in all three stages of the simplification pipeline. We argue that while context is an important measure, it alone is not sufficient in the process. In the complex word identification step, contextual information is inadequate, moreover, heavy feature engineering is required to use additional linguistic features. This paper presents a novel architecture for complex word identification that uses a pre-trained transformer model’s information flow through its hidden layers as a feature representation that implicitly encodes all the features required for identification. We portray how database methods and masked language modeling can be complementary to one another in substitute generation and ranking process that is built on the foundational pillars of Simplicity, Grammatical and Semantic correctness, and context preservation. We show that our proposed model generalizes well and outperforms the current state-of-the-art on wellknown datasets.</abstract>
<identifier type="citekey">pratik-etal-2023-improving</identifier>
<location>
<url>https://aclanthology.org/2023.icon-1.47</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>516</start>
<end>522</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving the Evaluation of NLP Approaches for Scientific Text Annotation with Ontology Embedding-Based Semantic Similarity Metrics
%A Pratik, Devkota
%A Somya, D. Mohanty
%A Prashanti, Manda
%Y Jyoti, D. Pawar
%Y Sobha, Lalitha Devi
%S Proceedings of the 20th International Conference on Natural Language Processing (ICON)
%D 2023
%8 December
%I NLP Association of India (NLPAI)
%C Goa University, Goa, India
%F pratik-etal-2023-improving
%X Lexical Simplification is a challenging task that aims to improve the readability of text for nonnative people, people with dyslexia, and any linguistic impairments. It consists of 3 components: 1) Complex Word Identification 2) Substitute Generation 3) Substitute Ranking. Current methods use contextual information as a primary source in all three stages of the simplification pipeline. We argue that while context is an important measure, it alone is not sufficient in the process. In the complex word identification step, contextual information is inadequate, moreover, heavy feature engineering is required to use additional linguistic features. This paper presents a novel architecture for complex word identification that uses a pre-trained transformer model’s information flow through its hidden layers as a feature representation that implicitly encodes all the features required for identification. We portray how database methods and masked language modeling can be complementary to one another in substitute generation and ranking process that is built on the foundational pillars of Simplicity, Grammatical and Semantic correctness, and context preservation. We show that our proposed model generalizes well and outperforms the current state-of-the-art on wellknown datasets.
%U https://aclanthology.org/2023.icon-1.47
%P 516-522
Markdown (Informal)
[Improving the Evaluation of NLP Approaches for Scientific Text Annotation with Ontology Embedding-Based Semantic Similarity Metrics](https://aclanthology.org/2023.icon-1.47) (Pratik et al., ICON 2023)
ACL