@inproceedings{opper-narayanaswamy-2024-self,
title = "Self-{S}tr{AE} at {S}em{E}val-2024 Task 1: Making Self-Structuring {A}uto{E}ncoders Learn More With Less",
author = "Opper, Mattia and
Narayanaswamy, Siddharth",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.18",
doi = "10.18653/v1/2024.semeval-1.18",
pages = "108--115",
abstract = "We present two simple improvements to the Self-Structuring AutoEncoder (Self-StrAE). Firstly, we show that including reconstruction to the vocabulary as an auxiliary objective improves representation quality. Secondly, we demonstrate that increasing the number of independent channels leads to significant improvements in embedding quality, while simultaneously reducing the number of parameters. Surprisingly, we demonstrate that this trend can be followed to the extreme, even to point of reducing the total number of non-embedding parameters to seven. Our system can be pre-trained from scratch with as little as 10M tokens of input data, and proves effective across English, Spanish and Afrikaans.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="opper-narayanaswamy-2024-self">
<titleInfo>
<title>Self-StrAE at SemEval-2024 Task 1: Making Self-Structuring AutoEncoders Learn More With Less</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mattia</namePart>
<namePart type="family">Opper</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Narayanaswamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present two simple improvements to the Self-Structuring AutoEncoder (Self-StrAE). Firstly, we show that including reconstruction to the vocabulary as an auxiliary objective improves representation quality. Secondly, we demonstrate that increasing the number of independent channels leads to significant improvements in embedding quality, while simultaneously reducing the number of parameters. Surprisingly, we demonstrate that this trend can be followed to the extreme, even to point of reducing the total number of non-embedding parameters to seven. Our system can be pre-trained from scratch with as little as 10M tokens of input data, and proves effective across English, Spanish and Afrikaans.</abstract>
<identifier type="citekey">opper-narayanaswamy-2024-self</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.18</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.18</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>108</start>
<end>115</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Self-StrAE at SemEval-2024 Task 1: Making Self-Structuring AutoEncoders Learn More With Less
%A Opper, Mattia
%A Narayanaswamy, Siddharth
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F opper-narayanaswamy-2024-self
%X We present two simple improvements to the Self-Structuring AutoEncoder (Self-StrAE). Firstly, we show that including reconstruction to the vocabulary as an auxiliary objective improves representation quality. Secondly, we demonstrate that increasing the number of independent channels leads to significant improvements in embedding quality, while simultaneously reducing the number of parameters. Surprisingly, we demonstrate that this trend can be followed to the extreme, even to point of reducing the total number of non-embedding parameters to seven. Our system can be pre-trained from scratch with as little as 10M tokens of input data, and proves effective across English, Spanish and Afrikaans.
%R 10.18653/v1/2024.semeval-1.18
%U https://aclanthology.org/2024.semeval-1.18
%U https://doi.org/10.18653/v1/2024.semeval-1.18
%P 108-115
Markdown (Informal)
[Self-StrAE at SemEval-2024 Task 1: Making Self-Structuring AutoEncoders Learn More With Less](https://aclanthology.org/2024.semeval-1.18) (Opper & Narayanaswamy, SemEval 2024)
ACL