@inproceedings{segonne-etal-2019-using,
title = "Using {W}iktionary as a resource for {WSD} : the case of {F}rench verbs",
author = "Segonne, Vincent and
Candito, Marie and
Crabb\'e, Beno\^\i t",
editor = "Dobnik, Simon and
Chatzikyriakidis, Stergios and
Demberg, Vera",
booktitle = "Proceedings of the 13th International Conference on Computational Semantics - Long Papers",
month = may,
year = "2019",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-0422/",
doi = "10.18653/v1/W19-0422",
pages = "259--270",
abstract = "As opposed to word sense induction, word sense disambiguation (WSD) has the advantage of us-ing interpretable senses, but requires annotated data, which are quite rare for most languages except English (Miller et al. 1993; Fellbaum, 1998). In this paper, we investigate which strategy to adopt to achieve WSD for languages lacking data that was annotated specifically for the task, focusing on the particular case of verb disambiguation in French. We first study the usability of Eurosense (Bovi et al. 2017) , a multilingual corpus extracted from Europarl (Kohen, 2005) and automatically annotated with BabelNet (Navigli and Ponzetto, 2010) senses. Such a resource opened up the way to supervised and semi-supervised WSD for resourceless languages like French. While this perspective looked promising, our evaluation on French verbs was inconclusive and showed the annotated senses' quality was not sufficient for supervised WSD on French verbs. Instead, we propose to use Wiktionary, a collaboratively edited, multilingual online dictionary, as a resource for WSD. Wiktionary provides both sense inventory and manually sense tagged examples which can be used to train supervised and semi-supervised WSD systems. Yet, because senses' distribution differ in lexicographic examples found in Wiktionary with respect to natural text, we then focus on studying the impact on WSD of the training data size and senses' distribution. Using state-of-the art semi-supervised systems, we report experiments of Wiktionary-based WSD for French verbs, evaluated on FrenchSemEval (FSE), a new dataset of French verbs manually annotated with wiktionary senses."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="segonne-etal-2019-using">
<titleInfo>
<title>Using Wiktionary as a resource for WSD : the case of French verbs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Segonne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Candito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benoî</namePart>
<namePart type="given">t</namePart>
<namePart type="family">Crabbé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Computational Semantics - Long Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stergios</namePart>
<namePart type="family">Chatzikyriakidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As opposed to word sense induction, word sense disambiguation (WSD) has the advantage of us-ing interpretable senses, but requires annotated data, which are quite rare for most languages except English (Miller et al. 1993; Fellbaum, 1998). In this paper, we investigate which strategy to adopt to achieve WSD for languages lacking data that was annotated specifically for the task, focusing on the particular case of verb disambiguation in French. We first study the usability of Eurosense (Bovi et al. 2017) , a multilingual corpus extracted from Europarl (Kohen, 2005) and automatically annotated with BabelNet (Navigli and Ponzetto, 2010) senses. Such a resource opened up the way to supervised and semi-supervised WSD for resourceless languages like French. While this perspective looked promising, our evaluation on French verbs was inconclusive and showed the annotated senses’ quality was not sufficient for supervised WSD on French verbs. Instead, we propose to use Wiktionary, a collaboratively edited, multilingual online dictionary, as a resource for WSD. Wiktionary provides both sense inventory and manually sense tagged examples which can be used to train supervised and semi-supervised WSD systems. Yet, because senses’ distribution differ in lexicographic examples found in Wiktionary with respect to natural text, we then focus on studying the impact on WSD of the training data size and senses’ distribution. Using state-of-the art semi-supervised systems, we report experiments of Wiktionary-based WSD for French verbs, evaluated on FrenchSemEval (FSE), a new dataset of French verbs manually annotated with wiktionary senses.</abstract>
<identifier type="citekey">segonne-etal-2019-using</identifier>
<identifier type="doi">10.18653/v1/W19-0422</identifier>
<location>
<url>https://aclanthology.org/W19-0422/</url>
</location>
<part>
<date>2019-05</date>
<extent unit="page">
<start>259</start>
<end>270</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Wiktionary as a resource for WSD : the case of French verbs
%A Segonne, Vincent
%A Candito, Marie
%A Crabbé, Benoî t.
%Y Dobnik, Simon
%Y Chatzikyriakidis, Stergios
%Y Demberg, Vera
%S Proceedings of the 13th International Conference on Computational Semantics - Long Papers
%D 2019
%8 May
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F segonne-etal-2019-using
%X As opposed to word sense induction, word sense disambiguation (WSD) has the advantage of us-ing interpretable senses, but requires annotated data, which are quite rare for most languages except English (Miller et al. 1993; Fellbaum, 1998). In this paper, we investigate which strategy to adopt to achieve WSD for languages lacking data that was annotated specifically for the task, focusing on the particular case of verb disambiguation in French. We first study the usability of Eurosense (Bovi et al. 2017) , a multilingual corpus extracted from Europarl (Kohen, 2005) and automatically annotated with BabelNet (Navigli and Ponzetto, 2010) senses. Such a resource opened up the way to supervised and semi-supervised WSD for resourceless languages like French. While this perspective looked promising, our evaluation on French verbs was inconclusive and showed the annotated senses’ quality was not sufficient for supervised WSD on French verbs. Instead, we propose to use Wiktionary, a collaboratively edited, multilingual online dictionary, as a resource for WSD. Wiktionary provides both sense inventory and manually sense tagged examples which can be used to train supervised and semi-supervised WSD systems. Yet, because senses’ distribution differ in lexicographic examples found in Wiktionary with respect to natural text, we then focus on studying the impact on WSD of the training data size and senses’ distribution. Using state-of-the art semi-supervised systems, we report experiments of Wiktionary-based WSD for French verbs, evaluated on FrenchSemEval (FSE), a new dataset of French verbs manually annotated with wiktionary senses.
%R 10.18653/v1/W19-0422
%U https://aclanthology.org/W19-0422/
%U https://doi.org/10.18653/v1/W19-0422
%P 259-270
Markdown (Informal)
[Using Wiktionary as a resource for WSD : the case of French verbs](https://aclanthology.org/W19-0422/) (Segonne et al., IWCS 2019)
ACL