@inproceedings{trust-etal-2022-snlp,
title = "{SNLP} at {T}ext{G}raphs 2022 Shared Task: Unsupervised Natural Language Premise Selection in Mathematical Texts Using Sentence-{MPN}et",
author = "Trust, Paul and
Kadusabe, Provia and
Younis, Haseeb and
Minghim, Rosane and
Milios, Evangelos and
Zahran, Ahmed",
editor = "Ustalov, Dmitry and
Gao, Yanjun and
Panchenko, Alexander and
Valentino, Marco and
Thayaparan, Mokanarangan and
Nguyen, Thien Huu and
Penn, Gerald and
Ramesh, Arti and
Jana, Abhik",
booktitle = "Proceedings of TextGraphs-16: Graph-based Methods for Natural Language Processing",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.textgraphs-1.13",
pages = "119--123",
abstract = "This paper describes our system for the submission to the TextGraphs 2022 shared task at COLING 2022: Natural Language Premise Selection (NLPS) from mathematical texts. The task of NLPS is about selecting mathematical statements called premises in a knowledge base written in natural language and mathematical formulae that are most likely to be used to prove a particular mathematical proof. We formulated this task as an unsupervised semantic similarity task by first obtaining contextualized embeddings of both the premises and mathematical proofs using sentence transformers. We then obtained the cosine similarity between the embeddings of premises and proofs and then selected premises with the highest cosine scores as the most probable. Our system improves over the baseline system that uses bag of words models based on term frequency inverse document frequency in terms of mean average precision (MAP) by about 23.5{\%} (0.1516 versus 0.1228).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="trust-etal-2022-snlp">
<titleInfo>
<title>SNLP at TextGraphs 2022 Shared Task: Unsupervised Natural Language Premise Selection in Mathematical Texts Using Sentence-MPNet</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Trust</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Provia</namePart>
<namePart type="family">Kadusabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haseeb</namePart>
<namePart type="family">Younis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rosane</namePart>
<namePart type="family">Minghim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evangelos</namePart>
<namePart type="family">Milios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Zahran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of TextGraphs-16: Graph-based Methods for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Ustalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanjun</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valentino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mokanarangan</namePart>
<namePart type="family">Thayaparan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thien</namePart>
<namePart type="given">Huu</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerald</namePart>
<namePart type="family">Penn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arti</namePart>
<namePart type="family">Ramesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhik</namePart>
<namePart type="family">Jana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes our system for the submission to the TextGraphs 2022 shared task at COLING 2022: Natural Language Premise Selection (NLPS) from mathematical texts. The task of NLPS is about selecting mathematical statements called premises in a knowledge base written in natural language and mathematical formulae that are most likely to be used to prove a particular mathematical proof. We formulated this task as an unsupervised semantic similarity task by first obtaining contextualized embeddings of both the premises and mathematical proofs using sentence transformers. We then obtained the cosine similarity between the embeddings of premises and proofs and then selected premises with the highest cosine scores as the most probable. Our system improves over the baseline system that uses bag of words models based on term frequency inverse document frequency in terms of mean average precision (MAP) by about 23.5% (0.1516 versus 0.1228).</abstract>
<identifier type="citekey">trust-etal-2022-snlp</identifier>
<location>
<url>https://aclanthology.org/2022.textgraphs-1.13</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>119</start>
<end>123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SNLP at TextGraphs 2022 Shared Task: Unsupervised Natural Language Premise Selection in Mathematical Texts Using Sentence-MPNet
%A Trust, Paul
%A Kadusabe, Provia
%A Younis, Haseeb
%A Minghim, Rosane
%A Milios, Evangelos
%A Zahran, Ahmed
%Y Ustalov, Dmitry
%Y Gao, Yanjun
%Y Panchenko, Alexander
%Y Valentino, Marco
%Y Thayaparan, Mokanarangan
%Y Nguyen, Thien Huu
%Y Penn, Gerald
%Y Ramesh, Arti
%Y Jana, Abhik
%S Proceedings of TextGraphs-16: Graph-based Methods for Natural Language Processing
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F trust-etal-2022-snlp
%X This paper describes our system for the submission to the TextGraphs 2022 shared task at COLING 2022: Natural Language Premise Selection (NLPS) from mathematical texts. The task of NLPS is about selecting mathematical statements called premises in a knowledge base written in natural language and mathematical formulae that are most likely to be used to prove a particular mathematical proof. We formulated this task as an unsupervised semantic similarity task by first obtaining contextualized embeddings of both the premises and mathematical proofs using sentence transformers. We then obtained the cosine similarity between the embeddings of premises and proofs and then selected premises with the highest cosine scores as the most probable. Our system improves over the baseline system that uses bag of words models based on term frequency inverse document frequency in terms of mean average precision (MAP) by about 23.5% (0.1516 versus 0.1228).
%U https://aclanthology.org/2022.textgraphs-1.13
%P 119-123
Markdown (Informal)
[SNLP at TextGraphs 2022 Shared Task: Unsupervised Natural Language Premise Selection in Mathematical Texts Using Sentence-MPNet](https://aclanthology.org/2022.textgraphs-1.13) (Trust et al., TextGraphs 2022)
ACL