@inproceedings{johannsson-etal-2024-evaluating,
title = "Evaluating {I}celandic Sentiment Analysis Models Trained on Translated Data",
author = {J{\'o}hannsson, {\'O}lafur A. and
Arndal, Birkir H. and
J{\'o}nsson, Eysteinn {\"O}. and
Olafsson, Stefan and
Loftsson, Hrafn},
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.sigul-1.11",
pages = "79--89",
abstract = "We experiment with sentiment classification models for Icelandic that leverage machine-translated data for training. Since no large sentiment dataset exists for Icelandic, we translate 50,000 English IMDb reviews, classified either as positive or negative, into Icelandic using two services: Google Translate and GreynirTranslate. After machine translation, we assess whether the sentiment of the source language text is retained in the target language. Moreover, we evaluate the accuracy of the sentiment classifiers on non-translated Icelandic text.The performance of three types of baseline classifiers is compared, i.e., Support Vector Machines, Logistic Regression and Naive Bayes, when trained on translated data generated by either translation service. Furthermore, we fine-tune and evaluate three pre-trained transformer-based models, RoBERTa, IceBERT and ELECTRA, on both the original English texts and the translated texts. Our results indicate that the transformer models perform better than the baseline classifiers on all datasets. Moreover, our evaluation shows that the transformer models trained on data translated from English reviews can be used to effectively classify sentiment on non-translated Icelandic movie reviews.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="johannsson-etal-2024-evaluating">
<titleInfo>
<title>Evaluating Icelandic Sentiment Analysis Models Trained on Translated Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ólafur</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Jóhannsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Birkir</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Arndal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eysteinn</namePart>
<namePart type="given">Ö.</namePart>
<namePart type="family">Jónsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Olafsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We experiment with sentiment classification models for Icelandic that leverage machine-translated data for training. Since no large sentiment dataset exists for Icelandic, we translate 50,000 English IMDb reviews, classified either as positive or negative, into Icelandic using two services: Google Translate and GreynirTranslate. After machine translation, we assess whether the sentiment of the source language text is retained in the target language. Moreover, we evaluate the accuracy of the sentiment classifiers on non-translated Icelandic text.The performance of three types of baseline classifiers is compared, i.e., Support Vector Machines, Logistic Regression and Naive Bayes, when trained on translated data generated by either translation service. Furthermore, we fine-tune and evaluate three pre-trained transformer-based models, RoBERTa, IceBERT and ELECTRA, on both the original English texts and the translated texts. Our results indicate that the transformer models perform better than the baseline classifiers on all datasets. Moreover, our evaluation shows that the transformer models trained on data translated from English reviews can be used to effectively classify sentiment on non-translated Icelandic movie reviews.</abstract>
<identifier type="citekey">johannsson-etal-2024-evaluating</identifier>
<location>
<url>https://aclanthology.org/2024.sigul-1.11</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>79</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Icelandic Sentiment Analysis Models Trained on Translated Data
%A Jóhannsson, Ólafur A.
%A Arndal, Birkir H.
%A Jónsson, Eysteinn Ö.
%A Olafsson, Stefan
%A Loftsson, Hrafn
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F johannsson-etal-2024-evaluating
%X We experiment with sentiment classification models for Icelandic that leverage machine-translated data for training. Since no large sentiment dataset exists for Icelandic, we translate 50,000 English IMDb reviews, classified either as positive or negative, into Icelandic using two services: Google Translate and GreynirTranslate. After machine translation, we assess whether the sentiment of the source language text is retained in the target language. Moreover, we evaluate the accuracy of the sentiment classifiers on non-translated Icelandic text.The performance of three types of baseline classifiers is compared, i.e., Support Vector Machines, Logistic Regression and Naive Bayes, when trained on translated data generated by either translation service. Furthermore, we fine-tune and evaluate three pre-trained transformer-based models, RoBERTa, IceBERT and ELECTRA, on both the original English texts and the translated texts. Our results indicate that the transformer models perform better than the baseline classifiers on all datasets. Moreover, our evaluation shows that the transformer models trained on data translated from English reviews can be used to effectively classify sentiment on non-translated Icelandic movie reviews.
%U https://aclanthology.org/2024.sigul-1.11
%P 79-89
Markdown (Informal)
[Evaluating Icelandic Sentiment Analysis Models Trained on Translated Data](https://aclanthology.org/2024.sigul-1.11) (Jóhannsson et al., SIGUL-WS 2024)
ACL