@inproceedings{clark-araki-2012-two,
title = "Two Database Resources for Processing Social Media {E}nglish Text",
author = "Clark, Eleanor and
Araki, Kenji",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/288_Paper.pdf",
pages = "3790--3793",
abstract = "This research focuses on text processing in the sphere of English-language social media. We introduce two database resources. The first, CECS (Casual English Conversion System) database, a lexicon-type resource of 1,255 entries, was constructed for use in our experimental system for the automated normalization of casual, irregularly-formed English used in communications such as Twitter. Our rule-based approach primarily aims to avoid problems caused by user creativity and individuality of language when Twitter-style text is used as input in Machine Translation, and to aid comprehension for non-native speakers of English. Although the database is still under development, we have so far carried out two evaluation experiments using our system which have shown positive results. The second database, CEGS (Casual English Generation System) phoneme database contains sets of alternative spellings for the phonemes in the CMU Pronouncing Dictionary, designed for use in a system for generating phoneme-based casual English text from regular English input; in other words, automatically producing humanlike creative sentences as an AI task. This paper provides an overview of the necessity, method, application and evaluation of both resources.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="clark-araki-2012-two">
<titleInfo>
<title>Two Database Resources for Processing Social Media English Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eleanor</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenji</namePart>
<namePart type="family">Araki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This research focuses on text processing in the sphere of English-language social media. We introduce two database resources. The first, CECS (Casual English Conversion System) database, a lexicon-type resource of 1,255 entries, was constructed for use in our experimental system for the automated normalization of casual, irregularly-formed English used in communications such as Twitter. Our rule-based approach primarily aims to avoid problems caused by user creativity and individuality of language when Twitter-style text is used as input in Machine Translation, and to aid comprehension for non-native speakers of English. Although the database is still under development, we have so far carried out two evaluation experiments using our system which have shown positive results. The second database, CEGS (Casual English Generation System) phoneme database contains sets of alternative spellings for the phonemes in the CMU Pronouncing Dictionary, designed for use in a system for generating phoneme-based casual English text from regular English input; in other words, automatically producing humanlike creative sentences as an AI task. This paper provides an overview of the necessity, method, application and evaluation of both resources.</abstract>
<identifier type="citekey">clark-araki-2012-two</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/288_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>3790</start>
<end>3793</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Two Database Resources for Processing Social Media English Text
%A Clark, Eleanor
%A Araki, Kenji
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F clark-araki-2012-two
%X This research focuses on text processing in the sphere of English-language social media. We introduce two database resources. The first, CECS (Casual English Conversion System) database, a lexicon-type resource of 1,255 entries, was constructed for use in our experimental system for the automated normalization of casual, irregularly-formed English used in communications such as Twitter. Our rule-based approach primarily aims to avoid problems caused by user creativity and individuality of language when Twitter-style text is used as input in Machine Translation, and to aid comprehension for non-native speakers of English. Although the database is still under development, we have so far carried out two evaluation experiments using our system which have shown positive results. The second database, CEGS (Casual English Generation System) phoneme database contains sets of alternative spellings for the phonemes in the CMU Pronouncing Dictionary, designed for use in a system for generating phoneme-based casual English text from regular English input; in other words, automatically producing humanlike creative sentences as an AI task. This paper provides an overview of the necessity, method, application and evaluation of both resources.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/288_Paper.pdf
%P 3790-3793
Markdown (Informal)
[Two Database Resources for Processing Social Media English Text](http://www.lrec-conf.org/proceedings/lrec2012/pdf/288_Paper.pdf) (Clark & Araki, LREC 2012)
ACL