@inproceedings{lazic-vujnovic-2023-jelenasteam,
title = "jelenasteam at {S}em{E}val-2023 Task 9: Quantification of Intimacy in Multilingual Tweets using Machine Learning Algorithms: A Comparative Study on the {MINT} Dataset",
author = "Lazi{\'c}, Jelena and
Vujnovi{\'c}, Sanja",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.87",
doi = "10.18653/v1/2023.semeval-1.87",
pages = "638--643",
abstract = "Intimacy is one of the fundamental aspects of our social life. It relates to intimate interactions with others, often including verbal self-disclosure. In this paper, we researched machine learning algorithms for quantification of the intimacy in the tweets. A new multilingual textual intimacy dataset named MINT was used. It contains tweets in 10 languages, including English, Spanish, French, Portuguese, Italian, and Chinese in both training and test datasets, and Dutch, Korean, Hindi, and Arabic in test data only. In the first experiment, linear regression models combine with the features and word embedding, and XLM-T deep learning model were compared. In the second experiment, cross-lingual learning between languanges was tested. In the third experiments, data was clustered using K-means. The results indicate that XLM-T pre-trained embedding might be a good choice for an unsupervised learning algorithm for intimacy detection.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lazic-vujnovic-2023-jelenasteam">
<titleInfo>
<title>jelenasteam at SemEval-2023 Task 9: Quantification of Intimacy in Multilingual Tweets using Machine Learning Algorithms: A Comparative Study on the MINT Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jelena</namePart>
<namePart type="family">Lazić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Vujnović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Intimacy is one of the fundamental aspects of our social life. It relates to intimate interactions with others, often including verbal self-disclosure. In this paper, we researched machine learning algorithms for quantification of the intimacy in the tweets. A new multilingual textual intimacy dataset named MINT was used. It contains tweets in 10 languages, including English, Spanish, French, Portuguese, Italian, and Chinese in both training and test datasets, and Dutch, Korean, Hindi, and Arabic in test data only. In the first experiment, linear regression models combine with the features and word embedding, and XLM-T deep learning model were compared. In the second experiment, cross-lingual learning between languanges was tested. In the third experiments, data was clustered using K-means. The results indicate that XLM-T pre-trained embedding might be a good choice for an unsupervised learning algorithm for intimacy detection.</abstract>
<identifier type="citekey">lazic-vujnovic-2023-jelenasteam</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.87</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.87</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>638</start>
<end>643</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T jelenasteam at SemEval-2023 Task 9: Quantification of Intimacy in Multilingual Tweets using Machine Learning Algorithms: A Comparative Study on the MINT Dataset
%A Lazić, Jelena
%A Vujnović, Sanja
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F lazic-vujnovic-2023-jelenasteam
%X Intimacy is one of the fundamental aspects of our social life. It relates to intimate interactions with others, often including verbal self-disclosure. In this paper, we researched machine learning algorithms for quantification of the intimacy in the tweets. A new multilingual textual intimacy dataset named MINT was used. It contains tweets in 10 languages, including English, Spanish, French, Portuguese, Italian, and Chinese in both training and test datasets, and Dutch, Korean, Hindi, and Arabic in test data only. In the first experiment, linear regression models combine with the features and word embedding, and XLM-T deep learning model were compared. In the second experiment, cross-lingual learning between languanges was tested. In the third experiments, data was clustered using K-means. The results indicate that XLM-T pre-trained embedding might be a good choice for an unsupervised learning algorithm for intimacy detection.
%R 10.18653/v1/2023.semeval-1.87
%U https://aclanthology.org/2023.semeval-1.87
%U https://doi.org/10.18653/v1/2023.semeval-1.87
%P 638-643
Markdown (Informal)
[jelenasteam at SemEval-2023 Task 9: Quantification of Intimacy in Multilingual Tweets using Machine Learning Algorithms: A Comparative Study on the MINT Dataset](https://aclanthology.org/2023.semeval-1.87) (Lazić & Vujnović, SemEval 2023)
ACL