@inproceedings{patel-domeniconi-2024-subword,
title = "Subword Attention and Post-Processing for Rare and Unknown Contextualized Embeddings",
author = "Patel, Raj and
Domeniconi, Carlotta",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.88",
doi = "10.18653/v1/2024.findings-naacl.88",
pages = "1383--1389",
abstract = "Word representations are an important aspect of Natural Language Processing (NLP). Representations are trained using large corpora, either as independent static embeddings or as part of a deep contextualized model. While word embeddings are useful, they struggle on rare and unknown words. As such, a large body of work has been done on estimating rare and unknown words. However, most of the methods focus on static embeddings, with few models focused on contextualized representations. In this work, we propose SPRUCE, a rare/unknown embedding architecture that focuses on contextualized representations. This architecture uses subword attention and embedding post-processing combined with the contextualized model to produce high quality embeddings. We then demonstrate these techniques lead to improved performance in most intrinsic and downstream tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patel-domeniconi-2024-subword">
<titleInfo>
<title>Subword Attention and Post-Processing for Rare and Unknown Contextualized Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raj</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlotta</namePart>
<namePart type="family">Domeniconi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word representations are an important aspect of Natural Language Processing (NLP). Representations are trained using large corpora, either as independent static embeddings or as part of a deep contextualized model. While word embeddings are useful, they struggle on rare and unknown words. As such, a large body of work has been done on estimating rare and unknown words. However, most of the methods focus on static embeddings, with few models focused on contextualized representations. In this work, we propose SPRUCE, a rare/unknown embedding architecture that focuses on contextualized representations. This architecture uses subword attention and embedding post-processing combined with the contextualized model to produce high quality embeddings. We then demonstrate these techniques lead to improved performance in most intrinsic and downstream tasks.</abstract>
<identifier type="citekey">patel-domeniconi-2024-subword</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.88</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.88</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1383</start>
<end>1389</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Subword Attention and Post-Processing for Rare and Unknown Contextualized Embeddings
%A Patel, Raj
%A Domeniconi, Carlotta
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F patel-domeniconi-2024-subword
%X Word representations are an important aspect of Natural Language Processing (NLP). Representations are trained using large corpora, either as independent static embeddings or as part of a deep contextualized model. While word embeddings are useful, they struggle on rare and unknown words. As such, a large body of work has been done on estimating rare and unknown words. However, most of the methods focus on static embeddings, with few models focused on contextualized representations. In this work, we propose SPRUCE, a rare/unknown embedding architecture that focuses on contextualized representations. This architecture uses subword attention and embedding post-processing combined with the contextualized model to produce high quality embeddings. We then demonstrate these techniques lead to improved performance in most intrinsic and downstream tasks.
%R 10.18653/v1/2024.findings-naacl.88
%U https://aclanthology.org/2024.findings-naacl.88
%U https://doi.org/10.18653/v1/2024.findings-naacl.88
%P 1383-1389
Markdown (Informal)
[Subword Attention and Post-Processing for Rare and Unknown Contextualized Embeddings](https://aclanthology.org/2024.findings-naacl.88) (Patel & Domeniconi, Findings 2024)
ACL