@inproceedings{michail-etal-2023-uzh,
title = "{UZH}{\_}{CL}yp at {S}em{E}val-2023 Task 9: Head-First Fine-Tuning and {C}hat{GPT} Data Generation for Cross-Lingual Learning in Tweet Intimacy Prediction",
author = "Michail, Andrianos and
Konstantinou, Stefanos and
Clematide, Simon",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.140",
doi = "10.18653/v1/2023.semeval-1.140",
pages = "1021--1029",
abstract = "This paper describes the submission of UZH{\_}CLyp for the SemEval 2023 Task 9 {``}Multilingual Tweet Intimacy Analysis. We achieved second-best results in all 10 languages according to the official Pearson{'}s correlation regression evaluation measure. Our cross-lingual transfer learning approach explores the benefits of using a Head-First Fine-Tuning method (HeFiT) that first updates only the regression head parameters and then also updates the pre-trained transformer encoder parameters at a reduced learning rate. Additionally, we study the impact of using a small set of automatically generated examples (in our case, from ChatGPT) for low-resource settings where no human-labeled data is available. Our study shows that HeFiT stabilizes training and consistently improves results for pre-trained models that lack domain adaptation to tweets. Our study also shows a noticeable performance increase in cross-lingual learning when synthetic data is used, confirming the usefulness of current text generation systems to improve zeroshot baseline results. Finally, we examine how possible inconsistencies in the annotated data contribute to cross-lingual interference issues.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="michail-etal-2023-uzh">
<titleInfo>
<title>UZH_CLyp at SemEval-2023 Task 9: Head-First Fine-Tuning and ChatGPT Data Generation for Cross-Lingual Learning in Tweet Intimacy Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrianos</namePart>
<namePart type="family">Michail</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefanos</namePart>
<namePart type="family">Konstantinou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Clematide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the submission of UZH_CLyp for the SemEval 2023 Task 9 “Multilingual Tweet Intimacy Analysis. We achieved second-best results in all 10 languages according to the official Pearson’s correlation regression evaluation measure. Our cross-lingual transfer learning approach explores the benefits of using a Head-First Fine-Tuning method (HeFiT) that first updates only the regression head parameters and then also updates the pre-trained transformer encoder parameters at a reduced learning rate. Additionally, we study the impact of using a small set of automatically generated examples (in our case, from ChatGPT) for low-resource settings where no human-labeled data is available. Our study shows that HeFiT stabilizes training and consistently improves results for pre-trained models that lack domain adaptation to tweets. Our study also shows a noticeable performance increase in cross-lingual learning when synthetic data is used, confirming the usefulness of current text generation systems to improve zeroshot baseline results. Finally, we examine how possible inconsistencies in the annotated data contribute to cross-lingual interference issues.</abstract>
<identifier type="citekey">michail-etal-2023-uzh</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.140</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.140</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1021</start>
<end>1029</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UZH_CLyp at SemEval-2023 Task 9: Head-First Fine-Tuning and ChatGPT Data Generation for Cross-Lingual Learning in Tweet Intimacy Prediction
%A Michail, Andrianos
%A Konstantinou, Stefanos
%A Clematide, Simon
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F michail-etal-2023-uzh
%X This paper describes the submission of UZH_CLyp for the SemEval 2023 Task 9 “Multilingual Tweet Intimacy Analysis. We achieved second-best results in all 10 languages according to the official Pearson’s correlation regression evaluation measure. Our cross-lingual transfer learning approach explores the benefits of using a Head-First Fine-Tuning method (HeFiT) that first updates only the regression head parameters and then also updates the pre-trained transformer encoder parameters at a reduced learning rate. Additionally, we study the impact of using a small set of automatically generated examples (in our case, from ChatGPT) for low-resource settings where no human-labeled data is available. Our study shows that HeFiT stabilizes training and consistently improves results for pre-trained models that lack domain adaptation to tweets. Our study also shows a noticeable performance increase in cross-lingual learning when synthetic data is used, confirming the usefulness of current text generation systems to improve zeroshot baseline results. Finally, we examine how possible inconsistencies in the annotated data contribute to cross-lingual interference issues.
%R 10.18653/v1/2023.semeval-1.140
%U https://aclanthology.org/2023.semeval-1.140
%U https://doi.org/10.18653/v1/2023.semeval-1.140
%P 1021-1029
Markdown (Informal)
[UZH_CLyp at SemEval-2023 Task 9: Head-First Fine-Tuning and ChatGPT Data Generation for Cross-Lingual Learning in Tweet Intimacy Prediction](https://aclanthology.org/2023.semeval-1.140) (Michail et al., SemEval 2023)
ACL