@inproceedings{patel-domeniconi-2023-enhancing,
title = "Enhancing Out-of-Vocabulary Estimation with Subword Attention",
author = "Patel, Raj and
Domeniconi, Carlotta",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.221",
doi = "10.18653/v1/2023.findings-acl.221",
pages = "3592--3601",
abstract = "Word embedding methods like word2vec and GloVe have been shown to learn strong representations of words. However, these methods only learn representations for words in the training corpus and therefore struggle to handle unknown and new words, known as out-of-vocabulary (OOV) words. As a result, there have been multiple attempts to learn OOV word representations in a similar fashion to how humans learn new words, using word roots/subwords and/or surrounding words. However, while most of these approaches use advanced architectures like attention on the context of the OOV word, they tend to use simple structures like ngram addition or character based convolutional neural networks (CNN) to handle processing subword information. In response to this, we propose SubAtt, a transformer based OOV estimation model that uses attention mechanisms on both the context and the subwords. In addition to attention, we also show that pretraining subword representations also leads to improvement in OOV estimation. We show SubAtt outperforms current state-of-the-art OOV estimation models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patel-domeniconi-2023-enhancing">
<titleInfo>
<title>Enhancing Out-of-Vocabulary Estimation with Subword Attention</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raj</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlotta</namePart>
<namePart type="family">Domeniconi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word embedding methods like word2vec and GloVe have been shown to learn strong representations of words. However, these methods only learn representations for words in the training corpus and therefore struggle to handle unknown and new words, known as out-of-vocabulary (OOV) words. As a result, there have been multiple attempts to learn OOV word representations in a similar fashion to how humans learn new words, using word roots/subwords and/or surrounding words. However, while most of these approaches use advanced architectures like attention on the context of the OOV word, they tend to use simple structures like ngram addition or character based convolutional neural networks (CNN) to handle processing subword information. In response to this, we propose SubAtt, a transformer based OOV estimation model that uses attention mechanisms on both the context and the subwords. In addition to attention, we also show that pretraining subword representations also leads to improvement in OOV estimation. We show SubAtt outperforms current state-of-the-art OOV estimation models.</abstract>
<identifier type="citekey">patel-domeniconi-2023-enhancing</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.221</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.221</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>3592</start>
<end>3601</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Out-of-Vocabulary Estimation with Subword Attention
%A Patel, Raj
%A Domeniconi, Carlotta
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F patel-domeniconi-2023-enhancing
%X Word embedding methods like word2vec and GloVe have been shown to learn strong representations of words. However, these methods only learn representations for words in the training corpus and therefore struggle to handle unknown and new words, known as out-of-vocabulary (OOV) words. As a result, there have been multiple attempts to learn OOV word representations in a similar fashion to how humans learn new words, using word roots/subwords and/or surrounding words. However, while most of these approaches use advanced architectures like attention on the context of the OOV word, they tend to use simple structures like ngram addition or character based convolutional neural networks (CNN) to handle processing subword information. In response to this, we propose SubAtt, a transformer based OOV estimation model that uses attention mechanisms on both the context and the subwords. In addition to attention, we also show that pretraining subword representations also leads to improvement in OOV estimation. We show SubAtt outperforms current state-of-the-art OOV estimation models.
%R 10.18653/v1/2023.findings-acl.221
%U https://aclanthology.org/2023.findings-acl.221
%U https://doi.org/10.18653/v1/2023.findings-acl.221
%P 3592-3601
Markdown (Informal)
[Enhancing Out-of-Vocabulary Estimation with Subword Attention](https://aclanthology.org/2023.findings-acl.221) (Patel & Domeniconi, Findings 2023)
ACL