@inproceedings{coster-etal-2018-hatching,
title = "Hatching Chick at {S}em{E}val-2018 Task 2: Multilingual Emoji Prediction",
author = {Coster, Jo{\"e}l and
van Dalen, Reinder Gerard and
Stierman, Nathalie Adri{\"e}nne Jacqueline},
editor = "Apidianaki, Marianna and
Mohammad, Saif M. and
May, Jonathan and
Shutova, Ekaterina and
Bethard, Steven and
Carpuat, Marine",
booktitle = "Proceedings of the 12th International Workshop on Semantic Evaluation",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S18-1070",
doi = "10.18653/v1/S18-1070",
pages = "445--448",
abstract = "As part of a SemEval 2018 shared task an attempt was made to build a system capable of predicting the occurence of a language{'}s most frequently used emoji in Tweets. Specifically, models for English and Spanish data were created and trained on 500.000 and 100.000 tweets respectively. In order to create these models, first a logistic regressor, a sequential LSTM, a random forest regressor and a SVM were tested. The latter was found to perform best and therefore optimized individually for both languages. During developmet f1-scores of 61 and 82 were obtained for English and Spanish data respectively, in comparison, f1-scores on the official evaluation data were 21 and 18. The significant decrease in performance during evaluation might be explained by overfitting during development and might therefore have partially be prevented by using cross-validation. Over all, emoji which occur in a very specific context such as a Christmas tree were found to be most predictable.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="coster-etal-2018-hatching">
<titleInfo>
<title>Hatching Chick at SemEval-2018 Task 2: Multilingual Emoji Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joël</namePart>
<namePart type="family">Coster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reinder</namePart>
<namePart type="given">Gerard</namePart>
<namePart type="family">van Dalen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathalie</namePart>
<namePart type="given">Adriënne</namePart>
<namePart type="given">Jacqueline</namePart>
<namePart type="family">Stierman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th International Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As part of a SemEval 2018 shared task an attempt was made to build a system capable of predicting the occurence of a language’s most frequently used emoji in Tweets. Specifically, models for English and Spanish data were created and trained on 500.000 and 100.000 tweets respectively. In order to create these models, first a logistic regressor, a sequential LSTM, a random forest regressor and a SVM were tested. The latter was found to perform best and therefore optimized individually for both languages. During developmet f1-scores of 61 and 82 were obtained for English and Spanish data respectively, in comparison, f1-scores on the official evaluation data were 21 and 18. The significant decrease in performance during evaluation might be explained by overfitting during development and might therefore have partially be prevented by using cross-validation. Over all, emoji which occur in a very specific context such as a Christmas tree were found to be most predictable.</abstract>
<identifier type="citekey">coster-etal-2018-hatching</identifier>
<identifier type="doi">10.18653/v1/S18-1070</identifier>
<location>
<url>https://aclanthology.org/S18-1070</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>445</start>
<end>448</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hatching Chick at SemEval-2018 Task 2: Multilingual Emoji Prediction
%A Coster, Joël
%A van Dalen, Reinder Gerard
%A Stierman, Nathalie Adriënne Jacqueline
%Y Apidianaki, Marianna
%Y Mohammad, Saif M.
%Y May, Jonathan
%Y Shutova, Ekaterina
%Y Bethard, Steven
%Y Carpuat, Marine
%S Proceedings of the 12th International Workshop on Semantic Evaluation
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F coster-etal-2018-hatching
%X As part of a SemEval 2018 shared task an attempt was made to build a system capable of predicting the occurence of a language’s most frequently used emoji in Tweets. Specifically, models for English and Spanish data were created and trained on 500.000 and 100.000 tweets respectively. In order to create these models, first a logistic regressor, a sequential LSTM, a random forest regressor and a SVM were tested. The latter was found to perform best and therefore optimized individually for both languages. During developmet f1-scores of 61 and 82 were obtained for English and Spanish data respectively, in comparison, f1-scores on the official evaluation data were 21 and 18. The significant decrease in performance during evaluation might be explained by overfitting during development and might therefore have partially be prevented by using cross-validation. Over all, emoji which occur in a very specific context such as a Christmas tree were found to be most predictable.
%R 10.18653/v1/S18-1070
%U https://aclanthology.org/S18-1070
%U https://doi.org/10.18653/v1/S18-1070
%P 445-448
Markdown (Informal)
[Hatching Chick at SemEval-2018 Task 2: Multilingual Emoji Prediction](https://aclanthology.org/S18-1070) (Coster et al., SemEval 2018)
ACL