@inproceedings{savary-etal-2019-without,
title = "Without lexicons, multiword expression identification will never fly: A position statement",
author = "Savary, Agata and
Cordeiro, Silvio and
Ramisch, Carlos",
editor = "Savary, Agata and
Escart{\'\i}n, Carla Parra and
Bond, Francis and
Mitrovi{\'c}, Jelena and
Mititelu, Verginica Barbu",
booktitle = "Proceedings of the Joint Workshop on Multiword Expressions and WordNet (MWE-WN 2019)",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-5110",
doi = "10.18653/v1/W19-5110",
pages = "79--91",
abstract = "Because most multiword expressions (MWEs), especially verbal ones, are semantically non-compositional, their automatic identification in running text is a prerequisite for semantically-oriented downstream applications. However, recent developments, driven notably by the PARSEME shared task on automatic identification of verbal MWEs, show that this task is harder than related tasks, despite recent contributions both in multilingual corpus annotation and in computational models. In this paper, we analyse possible reasons for this state of affairs. They lie in the nature of the MWE phenomenon, as well as in its distributional properties. We also offer a comparative analysis of the state-of-the-art systems, which exhibit particularly strong sensitivity to unseen data. On this basis, we claim that, in order to make strong headway in MWE identification, the community should bend its mind into coupling identification of MWEs with their discovery, via syntactic MWE lexicons. Such lexicons need not necessarily achieve a linguistically complete modelling of MWEs{'} behavior, but they should provide minimal morphosyntactic information to cover some potential uses, so as to complement existing MWE-annotated corpora. We define requirements for such minimal NLP-oriented lexicon, and we propose a roadmap for the MWE community driven by these requirements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="savary-etal-2019-without">
<titleInfo>
<title>Without lexicons, multiword expression identification will never fly: A position statement</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silvio</namePart>
<namePart type="family">Cordeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop on Multiword Expressions and WordNet (MWE-WN 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carla</namePart>
<namePart type="given">Parra</namePart>
<namePart type="family">Escartín</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jelena</namePart>
<namePart type="family">Mitrović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="given">Barbu</namePart>
<namePart type="family">Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Because most multiword expressions (MWEs), especially verbal ones, are semantically non-compositional, their automatic identification in running text is a prerequisite for semantically-oriented downstream applications. However, recent developments, driven notably by the PARSEME shared task on automatic identification of verbal MWEs, show that this task is harder than related tasks, despite recent contributions both in multilingual corpus annotation and in computational models. In this paper, we analyse possible reasons for this state of affairs. They lie in the nature of the MWE phenomenon, as well as in its distributional properties. We also offer a comparative analysis of the state-of-the-art systems, which exhibit particularly strong sensitivity to unseen data. On this basis, we claim that, in order to make strong headway in MWE identification, the community should bend its mind into coupling identification of MWEs with their discovery, via syntactic MWE lexicons. Such lexicons need not necessarily achieve a linguistically complete modelling of MWEs’ behavior, but they should provide minimal morphosyntactic information to cover some potential uses, so as to complement existing MWE-annotated corpora. We define requirements for such minimal NLP-oriented lexicon, and we propose a roadmap for the MWE community driven by these requirements.</abstract>
<identifier type="citekey">savary-etal-2019-without</identifier>
<identifier type="doi">10.18653/v1/W19-5110</identifier>
<location>
<url>https://aclanthology.org/W19-5110</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>79</start>
<end>91</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Without lexicons, multiword expression identification will never fly: A position statement
%A Savary, Agata
%A Cordeiro, Silvio
%A Ramisch, Carlos
%Y Savary, Agata
%Y Escartín, Carla Parra
%Y Bond, Francis
%Y Mitrović, Jelena
%Y Mititelu, Verginica Barbu
%S Proceedings of the Joint Workshop on Multiword Expressions and WordNet (MWE-WN 2019)
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F savary-etal-2019-without
%X Because most multiword expressions (MWEs), especially verbal ones, are semantically non-compositional, their automatic identification in running text is a prerequisite for semantically-oriented downstream applications. However, recent developments, driven notably by the PARSEME shared task on automatic identification of verbal MWEs, show that this task is harder than related tasks, despite recent contributions both in multilingual corpus annotation and in computational models. In this paper, we analyse possible reasons for this state of affairs. They lie in the nature of the MWE phenomenon, as well as in its distributional properties. We also offer a comparative analysis of the state-of-the-art systems, which exhibit particularly strong sensitivity to unseen data. On this basis, we claim that, in order to make strong headway in MWE identification, the community should bend its mind into coupling identification of MWEs with their discovery, via syntactic MWE lexicons. Such lexicons need not necessarily achieve a linguistically complete modelling of MWEs’ behavior, but they should provide minimal morphosyntactic information to cover some potential uses, so as to complement existing MWE-annotated corpora. We define requirements for such minimal NLP-oriented lexicon, and we propose a roadmap for the MWE community driven by these requirements.
%R 10.18653/v1/W19-5110
%U https://aclanthology.org/W19-5110
%U https://doi.org/10.18653/v1/W19-5110
%P 79-91
Markdown (Informal)
[Without lexicons, multiword expression identification will never fly: A position statement](https://aclanthology.org/W19-5110) (Savary et al., MWE 2019)
ACL