@inproceedings{jorgensen-2024-pear,
title = "{PEAR} at {S}em{E}val-2024 Task 1: Pair Encoding with Augmented Re-sampling for Semantic Textual Relatedness",
author = "J{\o}rgensen, Tollef",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.202",
doi = "10.18653/v1/2024.semeval-1.202",
pages = "1405--1411",
abstract = "This paper describes a system submitted to the supervised track (Track A) at SemEval-24: Semantic Textual Relatedness for African and Asian Languages. Challenged with datasets of varying sizes, some as small as 800 samples, we observe that the PEAR system, using smaller pre-trained masked language models to process sentence pairs (Pair Encoding), results in models that efficiently adapt to the task.In addition to the simplistic modeling approach, we experiment with hyperparameter optimization and data expansion from the provided training sets using multilingual bi-encoders, sampling a dynamic number of nearest neighbors (Augmented Re-sampling). The final models are lightweight, allowing fast experimentation and integration of new languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jorgensen-2024-pear">
<titleInfo>
<title>PEAR at SemEval-2024 Task 1: Pair Encoding with Augmented Re-sampling for Semantic Textual Relatedness</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tollef</namePart>
<namePart type="family">Jørgensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes a system submitted to the supervised track (Track A) at SemEval-24: Semantic Textual Relatedness for African and Asian Languages. Challenged with datasets of varying sizes, some as small as 800 samples, we observe that the PEAR system, using smaller pre-trained masked language models to process sentence pairs (Pair Encoding), results in models that efficiently adapt to the task.In addition to the simplistic modeling approach, we experiment with hyperparameter optimization and data expansion from the provided training sets using multilingual bi-encoders, sampling a dynamic number of nearest neighbors (Augmented Re-sampling). The final models are lightweight, allowing fast experimentation and integration of new languages.</abstract>
<identifier type="citekey">jorgensen-2024-pear</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.202</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.202</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1405</start>
<end>1411</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PEAR at SemEval-2024 Task 1: Pair Encoding with Augmented Re-sampling for Semantic Textual Relatedness
%A Jørgensen, Tollef
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F jorgensen-2024-pear
%X This paper describes a system submitted to the supervised track (Track A) at SemEval-24: Semantic Textual Relatedness for African and Asian Languages. Challenged with datasets of varying sizes, some as small as 800 samples, we observe that the PEAR system, using smaller pre-trained masked language models to process sentence pairs (Pair Encoding), results in models that efficiently adapt to the task.In addition to the simplistic modeling approach, we experiment with hyperparameter optimization and data expansion from the provided training sets using multilingual bi-encoders, sampling a dynamic number of nearest neighbors (Augmented Re-sampling). The final models are lightweight, allowing fast experimentation and integration of new languages.
%R 10.18653/v1/2024.semeval-1.202
%U https://aclanthology.org/2024.semeval-1.202
%U https://doi.org/10.18653/v1/2024.semeval-1.202
%P 1405-1411
Markdown (Informal)
[PEAR at SemEval-2024 Task 1: Pair Encoding with Augmented Re-sampling for Semantic Textual Relatedness](https://aclanthology.org/2024.semeval-1.202) (Jørgensen, SemEval 2024)
ACL