@inproceedings{brink-andersen-etal-2020-one,
title = "One of these words is not like the other: a reproduction of outlier identification using non-contextual word representations",
author = "Brink Andersen, Jesper and
Bak Bertelsen, Mikkel and
H{\o}rby Schou, Mikkel and
Ciosici, Manuel R. and
Assent, Ira",
booktitle = "Proceedings of the First Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.eval4nlp-1.12",
doi = "10.18653/v1/2020.eval4nlp-1.12",
pages = "120--130",
abstract = "Word embeddings are an active topic in the NLP research community. State-of-the-art neural models achieve high performance on downstream tasks, albeit at the cost of computationally expensive training. Cost aware solutions require cheaper models that still achieve good performance. We present several reproduction studies of intrinsic evaluation tasks that evaluate non-contextual word representations in multiple languages. Furthermore, we present 50-8-8, a new data set for the outlier identification task, which avoids limitations of the original data set, such as ambiguous words, infrequent words, and multi-word tokens, while increasing the number of test cases. The data set is expanded to contain semantic and syntactic tests and is multilingual (English, German, and Italian). We provide an in-depth analysis of word embedding models with a range of hyper-parameters. Our analysis shows the suitability of different models and hyper-parameters for different tasks and the greater difficulty of representing German and Italian languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brink-andersen-etal-2020-one">
<titleInfo>
<title>One of these words is not like the other: a reproduction of outlier identification using non-contextual word representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jesper</namePart>
<namePart type="family">Brink Andersen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikkel</namePart>
<namePart type="family">Bak Bertelsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikkel</namePart>
<namePart type="family">Hørby Schou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Ciosici</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ira</namePart>
<namePart type="family">Assent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word embeddings are an active topic in the NLP research community. State-of-the-art neural models achieve high performance on downstream tasks, albeit at the cost of computationally expensive training. Cost aware solutions require cheaper models that still achieve good performance. We present several reproduction studies of intrinsic evaluation tasks that evaluate non-contextual word representations in multiple languages. Furthermore, we present 50-8-8, a new data set for the outlier identification task, which avoids limitations of the original data set, such as ambiguous words, infrequent words, and multi-word tokens, while increasing the number of test cases. The data set is expanded to contain semantic and syntactic tests and is multilingual (English, German, and Italian). We provide an in-depth analysis of word embedding models with a range of hyper-parameters. Our analysis shows the suitability of different models and hyper-parameters for different tasks and the greater difficulty of representing German and Italian languages.</abstract>
<identifier type="citekey">brink-andersen-etal-2020-one</identifier>
<identifier type="doi">10.18653/v1/2020.eval4nlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2020.eval4nlp-1.12</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>120</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T One of these words is not like the other: a reproduction of outlier identification using non-contextual word representations
%A Brink Andersen, Jesper
%A Bak Bertelsen, Mikkel
%A Hørby Schou, Mikkel
%A Ciosici, Manuel R.
%A Assent, Ira
%S Proceedings of the First Workshop on Evaluation and Comparison of NLP Systems
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F brink-andersen-etal-2020-one
%X Word embeddings are an active topic in the NLP research community. State-of-the-art neural models achieve high performance on downstream tasks, albeit at the cost of computationally expensive training. Cost aware solutions require cheaper models that still achieve good performance. We present several reproduction studies of intrinsic evaluation tasks that evaluate non-contextual word representations in multiple languages. Furthermore, we present 50-8-8, a new data set for the outlier identification task, which avoids limitations of the original data set, such as ambiguous words, infrequent words, and multi-word tokens, while increasing the number of test cases. The data set is expanded to contain semantic and syntactic tests and is multilingual (English, German, and Italian). We provide an in-depth analysis of word embedding models with a range of hyper-parameters. Our analysis shows the suitability of different models and hyper-parameters for different tasks and the greater difficulty of representing German and Italian languages.
%R 10.18653/v1/2020.eval4nlp-1.12
%U https://aclanthology.org/2020.eval4nlp-1.12
%U https://doi.org/10.18653/v1/2020.eval4nlp-1.12
%P 120-130
Markdown (Informal)
[One of these words is not like the other: a reproduction of outlier identification using non-contextual word representations](https://aclanthology.org/2020.eval4nlp-1.12) (Brink Andersen et al., Eval4NLP 2020)
ACL