@inproceedings{van-hautte-etal-2019-bad,
title = "Bad Form: Comparing Context-Based and Form-Based Few-Shot Learning in Distributional Semantic Models",
author = "Van Hautte, Jeroen and
Emerson, Guy and
Rei, Marek",
editor = "Cherry, Colin and
Durrett, Greg and
Foster, George and
Haffari, Reza and
Khadivi, Shahram and
Peng, Nanyun and
Ren, Xiang and
Swayamdipta, Swabha",
booktitle = "Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-6104/",
doi = "10.18653/v1/D19-6104",
pages = "31--39",
abstract = "Word embeddings are an essential component in a wide range of natural language processing applications. However, distributional semantic models are known to struggle when only a small number of context sentences are available. Several methods have been proposed to obtain higher-quality vectors for these words, leveraging both this context information and sometimes the word forms themselves through a hybrid approach. We show that the current tasks do not suffice to evaluate models that use word-form information, as such models can easily leverage word forms in the training data that are related to word forms in the test data. We introduce 3 new tasks, allowing for a more balanced comparison between models. Furthermore, we show that hyperparameters that have largely been ignored in previous work can consistently improve the performance of both baseline and advanced models, achieving a new state of the art on 4 out of 6 tasks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="van-hautte-etal-2019-bad">
<titleInfo>
<title>Bad Form: Comparing Context-Based and Form-Based Few-Shot Learning in Distributional Semantic Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jeroen</namePart>
<namePart type="family">Van Hautte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Emerson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Rei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Foster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reza</namePart>
<namePart type="family">Haffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahram</namePart>
<namePart type="family">Khadivi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nanyun</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swabha</namePart>
<namePart type="family">Swayamdipta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word embeddings are an essential component in a wide range of natural language processing applications. However, distributional semantic models are known to struggle when only a small number of context sentences are available. Several methods have been proposed to obtain higher-quality vectors for these words, leveraging both this context information and sometimes the word forms themselves through a hybrid approach. We show that the current tasks do not suffice to evaluate models that use word-form information, as such models can easily leverage word forms in the training data that are related to word forms in the test data. We introduce 3 new tasks, allowing for a more balanced comparison between models. Furthermore, we show that hyperparameters that have largely been ignored in previous work can consistently improve the performance of both baseline and advanced models, achieving a new state of the art on 4 out of 6 tasks.</abstract>
<identifier type="citekey">van-hautte-etal-2019-bad</identifier>
<identifier type="doi">10.18653/v1/D19-6104</identifier>
<location>
<url>https://aclanthology.org/D19-6104/</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>31</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bad Form: Comparing Context-Based and Form-Based Few-Shot Learning in Distributional Semantic Models
%A Van Hautte, Jeroen
%A Emerson, Guy
%A Rei, Marek
%Y Cherry, Colin
%Y Durrett, Greg
%Y Foster, George
%Y Haffari, Reza
%Y Khadivi, Shahram
%Y Peng, Nanyun
%Y Ren, Xiang
%Y Swayamdipta, Swabha
%S Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F van-hautte-etal-2019-bad
%X Word embeddings are an essential component in a wide range of natural language processing applications. However, distributional semantic models are known to struggle when only a small number of context sentences are available. Several methods have been proposed to obtain higher-quality vectors for these words, leveraging both this context information and sometimes the word forms themselves through a hybrid approach. We show that the current tasks do not suffice to evaluate models that use word-form information, as such models can easily leverage word forms in the training data that are related to word forms in the test data. We introduce 3 new tasks, allowing for a more balanced comparison between models. Furthermore, we show that hyperparameters that have largely been ignored in previous work can consistently improve the performance of both baseline and advanced models, achieving a new state of the art on 4 out of 6 tasks.
%R 10.18653/v1/D19-6104
%U https://aclanthology.org/D19-6104/
%U https://doi.org/10.18653/v1/D19-6104
%P 31-39
Markdown (Informal)
[Bad Form: Comparing Context-Based and Form-Based Few-Shot Learning in Distributional Semantic Models](https://aclanthology.org/D19-6104/) (Van Hautte et al., 2019)
ACL