@inproceedings{zalmout-etal-2019-unsupervised,
title = "Unsupervised Neologism Normalization Using Embedding Space Mapping",
author = "Zalmout, Nasser and
Thadani, Kapil and
Pappu, Aasish",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5555/",
doi = "10.18653/v1/D19-5555",
pages = "425--430",
abstract = "This paper presents an approach for detecting and normalizing neologisms in social media content. Neologisms refer to recent expressions that are specific to certain entities or events and are being increasingly used by the public, but have not yet been accepted in mainstream language. Automated methods for handling neologisms are important for natural language understanding and normalization, especially for informal genres with user generated content. We present an unsupervised approach for detecting neologisms and then normalizing them to canonical words without relying on parallel training data. Our approach builds on the text normalization literature and introduces adaptations to fit the specificities of this task, including phonetic and etymological considerations. We evaluate the proposed techniques on a dataset of Reddit comments, with detected neologisms and corresponding normalizations."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zalmout-etal-2019-unsupervised">
<titleInfo>
<title>Unsupervised Neologism Normalization Using Embedding Space Mapping</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nasser</namePart>
<namePart type="family">Zalmout</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kapil</namePart>
<namePart type="family">Thadani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aasish</namePart>
<namePart type="family">Pappu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents an approach for detecting and normalizing neologisms in social media content. Neologisms refer to recent expressions that are specific to certain entities or events and are being increasingly used by the public, but have not yet been accepted in mainstream language. Automated methods for handling neologisms are important for natural language understanding and normalization, especially for informal genres with user generated content. We present an unsupervised approach for detecting neologisms and then normalizing them to canonical words without relying on parallel training data. Our approach builds on the text normalization literature and introduces adaptations to fit the specificities of this task, including phonetic and etymological considerations. We evaluate the proposed techniques on a dataset of Reddit comments, with detected neologisms and corresponding normalizations.</abstract>
<identifier type="citekey">zalmout-etal-2019-unsupervised</identifier>
<identifier type="doi">10.18653/v1/D19-5555</identifier>
<location>
<url>https://aclanthology.org/D19-5555/</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>425</start>
<end>430</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Neologism Normalization Using Embedding Space Mapping
%A Zalmout, Nasser
%A Thadani, Kapil
%A Pappu, Aasish
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F zalmout-etal-2019-unsupervised
%X This paper presents an approach for detecting and normalizing neologisms in social media content. Neologisms refer to recent expressions that are specific to certain entities or events and are being increasingly used by the public, but have not yet been accepted in mainstream language. Automated methods for handling neologisms are important for natural language understanding and normalization, especially for informal genres with user generated content. We present an unsupervised approach for detecting neologisms and then normalizing them to canonical words without relying on parallel training data. Our approach builds on the text normalization literature and introduces adaptations to fit the specificities of this task, including phonetic and etymological considerations. We evaluate the proposed techniques on a dataset of Reddit comments, with detected neologisms and corresponding normalizations.
%R 10.18653/v1/D19-5555
%U https://aclanthology.org/D19-5555/
%U https://doi.org/10.18653/v1/D19-5555
%P 425-430
Markdown (Informal)
[Unsupervised Neologism Normalization Using Embedding Space Mapping](https://aclanthology.org/D19-5555/) (Zalmout et al., WNUT 2019)
ACL