@inproceedings{bezancon-etal-2025-forbidden,
title = "Forbidden {FRUIT} is the Sweetest: An Annotated Tweets Corpus for {F}rench Unfrozen Idioms Identification",
author = {Bezan{\c{c}}on, Julien and
Lejeune, Ga{\"e}l and
Gautier, Antoine and
Hernandez, Marceau and
Ali{\'e}, F{\'e}lix},
editor = "Peng, Siyao and
Rehbein, Ines",
booktitle = "Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.law-1.6/",
doi = "10.18653/v1/2025.law-1.6",
pages = "70--86",
ISBN = "979-8-89176-262-6",
abstract = "Multiword expressions (MWEs) are a key area of interest in NLP, studied across various languages and inspiring the creation of dedicated datasets and shared tasks such as PARSEME. Puns in multiword expressions (PMWEs) can be described as MWEs that have been ``unfrozen'' to acquire a new meaning or create a wordplay. Unlike MWEs, they have received little attention in NLP, mainly due to the lack of resources available for their study. In this context, we introduce the French Unfrozen Idioms in Tweets (FRUIT) corpus, a dataset of tweets spanning three years and comprising 60,617 tweets containing both MWEs and PMWE candidates. We first describe the process of constructing this corpus, followed by an overview of the manual annotation task performed by three experts on 600 tweets, achieving a maximum {\ensuremath{\alpha}} score of 0.83. Insights from this manual annotation process were then used to develop a Game With A Purpose (GWAP) to annotate more tweets from the FRUIT corpus. This GWAP aims to enhance players' understanding of MWEs and PMWEs. Currently, 13 players made 2,206 annotations on 931 tweets, reaching an {\ensuremath{\alpha}} score of 0.70. In total, 1,531 tweets from the FRUIT corpus have been annotated."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bezancon-etal-2025-forbidden">
<titleInfo>
<title>Forbidden FRUIT is the Sweetest: An Annotated Tweets Corpus for French Unfrozen Idioms Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Bezançon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaël</namePart>
<namePart type="family">Lejeune</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Gautier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marceau</namePart>
<namePart type="family">Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Félix</namePart>
<namePart type="family">Alié</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Siyao</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Rehbein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-262-6</identifier>
</relatedItem>
<abstract>Multiword expressions (MWEs) are a key area of interest in NLP, studied across various languages and inspiring the creation of dedicated datasets and shared tasks such as PARSEME. Puns in multiword expressions (PMWEs) can be described as MWEs that have been “unfrozen” to acquire a new meaning or create a wordplay. Unlike MWEs, they have received little attention in NLP, mainly due to the lack of resources available for their study. In this context, we introduce the French Unfrozen Idioms in Tweets (FRUIT) corpus, a dataset of tweets spanning three years and comprising 60,617 tweets containing both MWEs and PMWE candidates. We first describe the process of constructing this corpus, followed by an overview of the manual annotation task performed by three experts on 600 tweets, achieving a maximum \ensuremathα score of 0.83. Insights from this manual annotation process were then used to develop a Game With A Purpose (GWAP) to annotate more tweets from the FRUIT corpus. This GWAP aims to enhance players’ understanding of MWEs and PMWEs. Currently, 13 players made 2,206 annotations on 931 tweets, reaching an \ensuremathα score of 0.70. In total, 1,531 tweets from the FRUIT corpus have been annotated.</abstract>
<identifier type="citekey">bezancon-etal-2025-forbidden</identifier>
<identifier type="doi">10.18653/v1/2025.law-1.6</identifier>
<location>
<url>https://aclanthology.org/2025.law-1.6/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>70</start>
<end>86</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Forbidden FRUIT is the Sweetest: An Annotated Tweets Corpus for French Unfrozen Idioms Identification
%A Bezançon, Julien
%A Lejeune, Gaël
%A Gautier, Antoine
%A Hernandez, Marceau
%A Alié, Félix
%Y Peng, Siyao
%Y Rehbein, Ines
%S Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-262-6
%F bezancon-etal-2025-forbidden
%X Multiword expressions (MWEs) are a key area of interest in NLP, studied across various languages and inspiring the creation of dedicated datasets and shared tasks such as PARSEME. Puns in multiword expressions (PMWEs) can be described as MWEs that have been “unfrozen” to acquire a new meaning or create a wordplay. Unlike MWEs, they have received little attention in NLP, mainly due to the lack of resources available for their study. In this context, we introduce the French Unfrozen Idioms in Tweets (FRUIT) corpus, a dataset of tweets spanning three years and comprising 60,617 tweets containing both MWEs and PMWE candidates. We first describe the process of constructing this corpus, followed by an overview of the manual annotation task performed by three experts on 600 tweets, achieving a maximum \ensuremathα score of 0.83. Insights from this manual annotation process were then used to develop a Game With A Purpose (GWAP) to annotate more tweets from the FRUIT corpus. This GWAP aims to enhance players’ understanding of MWEs and PMWEs. Currently, 13 players made 2,206 annotations on 931 tweets, reaching an \ensuremathα score of 0.70. In total, 1,531 tweets from the FRUIT corpus have been annotated.
%R 10.18653/v1/2025.law-1.6
%U https://aclanthology.org/2025.law-1.6/
%U https://doi.org/10.18653/v1/2025.law-1.6
%P 70-86
Markdown (Informal)
[Forbidden FRUIT is the Sweetest: An Annotated Tweets Corpus for French Unfrozen Idioms Identification](https://aclanthology.org/2025.law-1.6/) (Bezançon et al., LAW 2025)
ACL