@inproceedings{lejeune-cartier-2017-character,
title = "Character Based Pattern Mining for Neology Detection",
author = {Lejeune, Ga{\"e}l and
Cartier, Emmanuel},
editor = "Faruqui, Manaal and
Schuetze, Hinrich and
Trancoso, Isabel and
Yaghoobzadeh, Yadollah",
booktitle = "Proceedings of the First Workshop on Subword and Character Level Models in {NLP}",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4103",
doi = "10.18653/v1/W17-4103",
pages = "25--30",
abstract = "Detecting neologisms is essential in real-time natural language processing applications. Not only can it enable to follow the lexical evolution of languages, but it is also essential for updating linguistic resources and parsers. In this paper, neology detection is considered as a classification task where a system has to assess whether a given lexical item is an actual neologism or not. We propose a combination of an unsupervised data mining technique and a supervised machine learning approach. It is inspired by current researches in stylometry and on token-level and character-level patterns. We train and evaluate our system on a manually designed reference dataset in French and Russian. We show that this approach is able to largely outperform state-of-the-art neology detection systems. Furthermore, character-level patterns exhibit good properties for multilingual extensions of the system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lejeune-cartier-2017-character">
<titleInfo>
<title>Character Based Pattern Mining for Neology Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gaël</namePart>
<namePart type="family">Lejeune</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Cartier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Subword and Character Level Models in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manaal</namePart>
<namePart type="family">Faruqui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schuetze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yadollah</namePart>
<namePart type="family">Yaghoobzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Detecting neologisms is essential in real-time natural language processing applications. Not only can it enable to follow the lexical evolution of languages, but it is also essential for updating linguistic resources and parsers. In this paper, neology detection is considered as a classification task where a system has to assess whether a given lexical item is an actual neologism or not. We propose a combination of an unsupervised data mining technique and a supervised machine learning approach. It is inspired by current researches in stylometry and on token-level and character-level patterns. We train and evaluate our system on a manually designed reference dataset in French and Russian. We show that this approach is able to largely outperform state-of-the-art neology detection systems. Furthermore, character-level patterns exhibit good properties for multilingual extensions of the system.</abstract>
<identifier type="citekey">lejeune-cartier-2017-character</identifier>
<identifier type="doi">10.18653/v1/W17-4103</identifier>
<location>
<url>https://aclanthology.org/W17-4103</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>25</start>
<end>30</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Character Based Pattern Mining for Neology Detection
%A Lejeune, Gaël
%A Cartier, Emmanuel
%Y Faruqui, Manaal
%Y Schuetze, Hinrich
%Y Trancoso, Isabel
%Y Yaghoobzadeh, Yadollah
%S Proceedings of the First Workshop on Subword and Character Level Models in NLP
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F lejeune-cartier-2017-character
%X Detecting neologisms is essential in real-time natural language processing applications. Not only can it enable to follow the lexical evolution of languages, but it is also essential for updating linguistic resources and parsers. In this paper, neology detection is considered as a classification task where a system has to assess whether a given lexical item is an actual neologism or not. We propose a combination of an unsupervised data mining technique and a supervised machine learning approach. It is inspired by current researches in stylometry and on token-level and character-level patterns. We train and evaluate our system on a manually designed reference dataset in French and Russian. We show that this approach is able to largely outperform state-of-the-art neology detection systems. Furthermore, character-level patterns exhibit good properties for multilingual extensions of the system.
%R 10.18653/v1/W17-4103
%U https://aclanthology.org/W17-4103
%U https://doi.org/10.18653/v1/W17-4103
%P 25-30
Markdown (Informal)
[Character Based Pattern Mining for Neology Detection](https://aclanthology.org/W17-4103) (Lejeune & Cartier, SCLeM 2017)
ACL