@inproceedings{flint-etal-2017-text,
title = "A Text Normalisation System for Non-Standard {E}nglish Words",
author = "Flint, Emma and
Ford, Elliot and
Thomas, Olivia and
Caines, Andrew and
Buttery, Paula",
editor = "Derczynski, Leon and
Xu, Wei and
Ritter, Alan and
Baldwin, Tim",
booktitle = "Proceedings of the 3rd Workshop on Noisy User-generated Text",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4414",
doi = "10.18653/v1/W17-4414",
pages = "107--115",
abstract = "This paper investigates the problem of text normalisation; specifically, the normalisation of non-standard words (NSWs) in English. Non-standard words can be defined as those word tokens which do not have a dictionary entry, and cannot be pronounced using the usual letter-to-phoneme conversion rules; e.g. lbs, 99.3{\%}, {\#}EMNLP2017. NSWs pose a challenge to the proper functioning of text-to-speech technology, and the solution is to spell them out in such a way that they can be pronounced appropriately. We describe our four-stage normalisation system made up of components for detection, classification, division and expansion of NSWs. Performance is favourabe compared to previous work in the field (Sproat et al. 2001, Normalization of non-standard words), as well as state-of-the-art text-to-speech software. Further, we update Sproat et al.{'}s NSW taxonomy, and create a more customisable system where users are able to input their own abbreviations and specify into which variety of English (currently available: British or American) they wish to normalise.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="flint-etal-2017-text">
<titleInfo>
<title>A Text Normalisation System for Non-Standard English Words</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emma</namePart>
<namePart type="family">Flint</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elliot</namePart>
<namePart type="family">Ford</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olivia</namePart>
<namePart type="family">Thomas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Caines</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paula</namePart>
<namePart type="family">Buttery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Noisy User-generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates the problem of text normalisation; specifically, the normalisation of non-standard words (NSWs) in English. Non-standard words can be defined as those word tokens which do not have a dictionary entry, and cannot be pronounced using the usual letter-to-phoneme conversion rules; e.g. lbs, 99.3%, #EMNLP2017. NSWs pose a challenge to the proper functioning of text-to-speech technology, and the solution is to spell them out in such a way that they can be pronounced appropriately. We describe our four-stage normalisation system made up of components for detection, classification, division and expansion of NSWs. Performance is favourabe compared to previous work in the field (Sproat et al. 2001, Normalization of non-standard words), as well as state-of-the-art text-to-speech software. Further, we update Sproat et al.’s NSW taxonomy, and create a more customisable system where users are able to input their own abbreviations and specify into which variety of English (currently available: British or American) they wish to normalise.</abstract>
<identifier type="citekey">flint-etal-2017-text</identifier>
<identifier type="doi">10.18653/v1/W17-4414</identifier>
<location>
<url>https://aclanthology.org/W17-4414</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>107</start>
<end>115</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Text Normalisation System for Non-Standard English Words
%A Flint, Emma
%A Ford, Elliot
%A Thomas, Olivia
%A Caines, Andrew
%A Buttery, Paula
%Y Derczynski, Leon
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%S Proceedings of the 3rd Workshop on Noisy User-generated Text
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F flint-etal-2017-text
%X This paper investigates the problem of text normalisation; specifically, the normalisation of non-standard words (NSWs) in English. Non-standard words can be defined as those word tokens which do not have a dictionary entry, and cannot be pronounced using the usual letter-to-phoneme conversion rules; e.g. lbs, 99.3%, #EMNLP2017. NSWs pose a challenge to the proper functioning of text-to-speech technology, and the solution is to spell them out in such a way that they can be pronounced appropriately. We describe our four-stage normalisation system made up of components for detection, classification, division and expansion of NSWs. Performance is favourabe compared to previous work in the field (Sproat et al. 2001, Normalization of non-standard words), as well as state-of-the-art text-to-speech software. Further, we update Sproat et al.’s NSW taxonomy, and create a more customisable system where users are able to input their own abbreviations and specify into which variety of English (currently available: British or American) they wish to normalise.
%R 10.18653/v1/W17-4414
%U https://aclanthology.org/W17-4414
%U https://doi.org/10.18653/v1/W17-4414
%P 107-115
Markdown (Informal)
[A Text Normalisation System for Non-Standard English Words](https://aclanthology.org/W17-4414) (Flint et al., WNUT 2017)
ACL