@inproceedings{guo-etal-2021-bertweetfr,
title = "{BERT}weet{FR} : Domain Adaptation of Pre-Trained Language Models for {F}rench Tweets",
author = "Guo, Yanzhu and
Rennard, Virgile and
Xypolopoulos, Christos and
Vazirgiannis, Michalis",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wnut-1.49",
doi = "10.18653/v1/2021.wnut-1.49",
pages = "445--450",
abstract = "We introduce BERTweetFR, the first large-scale pre-trained language model for French tweets. Our model is initialised using a general-domain French language model CamemBERT which follows the base architecture of BERT. Experiments show that BERTweetFR outperforms all previous general-domain French language models on two downstream Twitter NLP tasks of offensiveness identification and named entity recognition. The dataset used in the offensiveness detection task is first created and annotated by our team, filling in the gap of such analytic datasets in French. We make our model publicly available in the transformers library with the aim of promoting future research in analytic tasks for French tweets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guo-etal-2021-bertweetfr">
<titleInfo>
<title>BERTweetFR : Domain Adaptation of Pre-Trained Language Models for French Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanzhu</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Virgile</namePart>
<namePart type="family">Rennard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Xypolopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michalis</namePart>
<namePart type="family">Vazirgiannis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce BERTweetFR, the first large-scale pre-trained language model for French tweets. Our model is initialised using a general-domain French language model CamemBERT which follows the base architecture of BERT. Experiments show that BERTweetFR outperforms all previous general-domain French language models on two downstream Twitter NLP tasks of offensiveness identification and named entity recognition. The dataset used in the offensiveness detection task is first created and annotated by our team, filling in the gap of such analytic datasets in French. We make our model publicly available in the transformers library with the aim of promoting future research in analytic tasks for French tweets.</abstract>
<identifier type="citekey">guo-etal-2021-bertweetfr</identifier>
<identifier type="doi">10.18653/v1/2021.wnut-1.49</identifier>
<location>
<url>https://aclanthology.org/2021.wnut-1.49</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>445</start>
<end>450</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERTweetFR : Domain Adaptation of Pre-Trained Language Models for French Tweets
%A Guo, Yanzhu
%A Rennard, Virgile
%A Xypolopoulos, Christos
%A Vazirgiannis, Michalis
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F guo-etal-2021-bertweetfr
%X We introduce BERTweetFR, the first large-scale pre-trained language model for French tweets. Our model is initialised using a general-domain French language model CamemBERT which follows the base architecture of BERT. Experiments show that BERTweetFR outperforms all previous general-domain French language models on two downstream Twitter NLP tasks of offensiveness identification and named entity recognition. The dataset used in the offensiveness detection task is first created and annotated by our team, filling in the gap of such analytic datasets in French. We make our model publicly available in the transformers library with the aim of promoting future research in analytic tasks for French tweets.
%R 10.18653/v1/2021.wnut-1.49
%U https://aclanthology.org/2021.wnut-1.49
%U https://doi.org/10.18653/v1/2021.wnut-1.49
%P 445-450
Markdown (Informal)
[BERTweetFR : Domain Adaptation of Pre-Trained Language Models for French Tweets](https://aclanthology.org/2021.wnut-1.49) (Guo et al., WNUT 2021)
ACL