@inproceedings{lopez-etal-2014-towards,
title = "Towards Electronic {SMS} Dictionary Construction: An Alignment-based Approach",
author = "Lopez, C{\'e}dric and
Bestandji, Reda and
Roche, Mathieu and
Panckhurst, Rachel",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/753_Paper.pdf",
pages = "2833--2838",
abstract = "In this paper, we propose a method for aligning text messages (entitled AlignSMS) in order to automatically build an SMS dictionary. An extract of 100 text messages from the 88milSMS corpus (Panckhurst el al., 2013, 2014) was used as an initial test. More than 90,000 authentic text messages in French were collected from the general public by a group of academics in the south of France in the context of the sud4science project (\url{http://www.sud4science.org}). This project is itself part of a vast international SMS data collection project, entitled sms4science (\url{http://www.sms4science.org}, Fairon et al. 2006, Cougnon, 2014). After corpus collation, pre-processing and anonymisation (Accorsi et al., 2012, Patel et al., 2013), we discuss how raw anonymised text messages can be transcoded into normalised text messages, using a statistical alignment method. The future objective is to set up a hybrid (symbolic/statistic) approach based on both grammar rules and our statistical AlignSMS method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lopez-etal-2014-towards">
<titleInfo>
<title>Towards Electronic SMS Dictionary Construction: An Alignment-based Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cédric</namePart>
<namePart type="family">Lopez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reda</namePart>
<namePart type="family">Bestandji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Roche</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Panckhurst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we propose a method for aligning text messages (entitled AlignSMS) in order to automatically build an SMS dictionary. An extract of 100 text messages from the 88milSMS corpus (Panckhurst el al., 2013, 2014) was used as an initial test. More than 90,000 authentic text messages in French were collected from the general public by a group of academics in the south of France in the context of the sud4science project (http://www.sud4science.org). This project is itself part of a vast international SMS data collection project, entitled sms4science (http://www.sms4science.org, Fairon et al. 2006, Cougnon, 2014). After corpus collation, pre-processing and anonymisation (Accorsi et al., 2012, Patel et al., 2013), we discuss how raw anonymised text messages can be transcoded into normalised text messages, using a statistical alignment method. The future objective is to set up a hybrid (symbolic/statistic) approach based on both grammar rules and our statistical AlignSMS method.</abstract>
<identifier type="citekey">lopez-etal-2014-towards</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/753_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>2833</start>
<end>2838</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Electronic SMS Dictionary Construction: An Alignment-based Approach
%A Lopez, Cédric
%A Bestandji, Reda
%A Roche, Mathieu
%A Panckhurst, Rachel
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F lopez-etal-2014-towards
%X In this paper, we propose a method for aligning text messages (entitled AlignSMS) in order to automatically build an SMS dictionary. An extract of 100 text messages from the 88milSMS corpus (Panckhurst el al., 2013, 2014) was used as an initial test. More than 90,000 authentic text messages in French were collected from the general public by a group of academics in the south of France in the context of the sud4science project (http://www.sud4science.org). This project is itself part of a vast international SMS data collection project, entitled sms4science (http://www.sms4science.org, Fairon et al. 2006, Cougnon, 2014). After corpus collation, pre-processing and anonymisation (Accorsi et al., 2012, Patel et al., 2013), we discuss how raw anonymised text messages can be transcoded into normalised text messages, using a statistical alignment method. The future objective is to set up a hybrid (symbolic/statistic) approach based on both grammar rules and our statistical AlignSMS method.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/753_Paper.pdf
%P 2833-2838
Markdown (Informal)
[Towards Electronic SMS Dictionary Construction: An Alignment-based Approach](http://www.lrec-conf.org/proceedings/lrec2014/pdf/753_Paper.pdf) (Lopez et al., LREC 2014)
ACL