@inproceedings{buyukoz-etal-2020-analyzing,
title = "Analyzing {ELM}o and {D}istil{BERT} on Socio-political News Classification",
author = {B{\"u}y{\"u}k{\"o}z, Berfu and
H{\"u}rriyeto{\u{g}}lu, Ali and
{\"O}zg{\"u}r, Arzucan},
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Y{\"o}r{\"u}k, Erdem and
Zavarella, Vanni and
Tanev, Hristo},
booktitle = "Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.aespen-1.4",
pages = "9--18",
abstract = "This study evaluates the robustness of two state-of-the-art deep contextual language representations, ELMo and DistilBERT, on supervised learning of binary protest news classification (PC) and sentiment analysis (SA) of product reviews. A {''}cross-context{''} setting is enabled using test sets that are distinct from the training data. The models are fine-tuned and fed into a Feed-Forward Neural Network (FFNN) and a Bidirectional Long Short Term Memory network (BiLSTM). Multinomial Naive Bayes (MNB) and Linear Support Vector Machine (LSVM) are used as traditional baselines. The results suggest that DistilBERT can transfer generic semantic knowledge to other domains better than ELMo. DistilBERT is also 30{\%} smaller and 83{\%} faster than ELMo, which suggests superiority for smaller computational training budgets. When generalization is not the utmost preference and test domain is similar to the training domain, the traditional machine learning (ML) algorithms can still be considered as more economic alternatives to deep language representations.",
language = "English",
ISBN = "979-10-95546-50-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="buyukoz-etal-2020-analyzing">
<titleInfo>
<title>Analyzing ELMo and DistilBERT on Socio-political News Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Berfu</namePart>
<namePart type="family">Büyüköz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arzucan</namePart>
<namePart type="family">Özgür</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erdem</namePart>
<namePart type="family">Yörük</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanni</namePart>
<namePart type="family">Zavarella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-50-4</identifier>
</relatedItem>
<abstract>This study evaluates the robustness of two state-of-the-art deep contextual language representations, ELMo and DistilBERT, on supervised learning of binary protest news classification (PC) and sentiment analysis (SA) of product reviews. A ”cross-context” setting is enabled using test sets that are distinct from the training data. The models are fine-tuned and fed into a Feed-Forward Neural Network (FFNN) and a Bidirectional Long Short Term Memory network (BiLSTM). Multinomial Naive Bayes (MNB) and Linear Support Vector Machine (LSVM) are used as traditional baselines. The results suggest that DistilBERT can transfer generic semantic knowledge to other domains better than ELMo. DistilBERT is also 30% smaller and 83% faster than ELMo, which suggests superiority for smaller computational training budgets. When generalization is not the utmost preference and test domain is similar to the training domain, the traditional machine learning (ML) algorithms can still be considered as more economic alternatives to deep language representations.</abstract>
<identifier type="citekey">buyukoz-etal-2020-analyzing</identifier>
<location>
<url>https://aclanthology.org/2020.aespen-1.4</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>9</start>
<end>18</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analyzing ELMo and DistilBERT on Socio-political News Classification
%A Büyüköz, Berfu
%A Hürriyetoğlu, Ali
%A Özgür, Arzucan
%Y Hürriyetoğlu, Ali
%Y Yörük, Erdem
%Y Zavarella, Vanni
%Y Tanev, Hristo
%S Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-50-4
%G English
%F buyukoz-etal-2020-analyzing
%X This study evaluates the robustness of two state-of-the-art deep contextual language representations, ELMo and DistilBERT, on supervised learning of binary protest news classification (PC) and sentiment analysis (SA) of product reviews. A ”cross-context” setting is enabled using test sets that are distinct from the training data. The models are fine-tuned and fed into a Feed-Forward Neural Network (FFNN) and a Bidirectional Long Short Term Memory network (BiLSTM). Multinomial Naive Bayes (MNB) and Linear Support Vector Machine (LSVM) are used as traditional baselines. The results suggest that DistilBERT can transfer generic semantic knowledge to other domains better than ELMo. DistilBERT is also 30% smaller and 83% faster than ELMo, which suggests superiority for smaller computational training budgets. When generalization is not the utmost preference and test domain is similar to the training domain, the traditional machine learning (ML) algorithms can still be considered as more economic alternatives to deep language representations.
%U https://aclanthology.org/2020.aespen-1.4
%P 9-18
Markdown (Informal)
[Analyzing ELMo and DistilBERT on Socio-political News Classification](https://aclanthology.org/2020.aespen-1.4) (Büyüköz et al., AESPEN 2020)
ACL