<?xml version="1.0" encoding="UTF-16"?>

<!--XML document generated using OCR technology from Nuance Communications, Inc.-->

<document xmlns="http://www.scansoft.com/omnipage/xml/ssdoc-schema3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1421" marginTop="1420" marginRight="1392" marginBottom="358" offsetX="-10" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1421" t="1420" r="10517" b="2570">

<column l="1421" t="1420" r="10517" b="2570">

<para l="2323" t="1488" r="9590" b="1762" alignment="centered" spaceBefore="14" spaceAfter="788" lsp="exactly" lspExact="341" language="en">

<ln l="2323" t="1488" r="9590" b="1762" baseLine="1694" bold="true" underlined="none" subsuperscript="none" fontSize="1500" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2323" t="1493" r="3331" b="1699">Toward</wd>

<space/>

<wd l="3418" t="1493" r="4301" b="1699">Tweets</wd>

<space/>

<wd l="4387" t="1488" r="6226" b="1762">Normalization</wd>

<space/>

<wd l="6312" t="1488" r="7042" b="1762">Using</wd>

<space/>

<wd l="7118" t="1488" r="8443" b="1699">Maximum</wd>

<space/>

<wd l="8525" t="1493" r="9590" b="1762">Entropy</wd>

</ln>

</para>

</column>

</section>

<section l="1421" t="2570" r="10493" b="4008">

<column l="1421" t="2570" r="4301" b="4008">

<para l="1570" t="2626" r="4152" b="2794" alignment="justified" li="144" spaceBefore="3" lsp="exactly" lspExact="272" language="en">

<ln l="1570" t="2626" r="4152" b="2794" baseLine="2784" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1570" t="2626" r="2813" b="2794">Mohammad</wd>

<space/>

<wd l="2875" t="2626" r="3451" b="2794">Arshi</wd>

<space/>

<wd l="3518" t="2626" r="4152" b="2794">Saloot</wd>

</ln>

</para>

<para l="1598" t="2890" r="4147" b="3389" alignment="justified" li="144" fli="72" lsp="exactly" lspExact="273" language="en">

<ln l="1690" t="2890" r="4018" b="3110" baseLine="3058" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1690" t="2894" r="2822" b="3110">Department</wd>

<space/>

<wd l="2885" t="2890" r="3106" b="3062">of</wd>

<space/>

<wd l="3144" t="2890" r="4018" b="3062">Artificial</wd>

<space/>

</ln>

<ln l="1598" t="3168" r="4147" b="3389" baseLine="3331" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1598" t="3168" r="2774" b="3389">Intelligence,</wd>

<space/>

<wd l="2846" t="3168" r="3864" b="3389">University</wd>

<space/>

<wd l="3922" t="3168" r="4147" b="3341">of</wd>

</ln>

</para>

<para l="1637" t="3442" r="4080" b="3662" alignment="justified" li="216" spaceBefore="3" lsp="exactly" lspExact="276" language="en">

<ln l="1637" t="3442" r="4080" b="3662" baseLine="3610" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1637" t="3442" r="2405" b="3662">Malaya,</wd>

<space/>

<wd l="2486" t="3446" r="3125" b="3653">50603,</wd>

<space/>

<wd l="3197" t="3442" r="4080" b="3662">Malaysia</wd>

</ln>

</para>

<para l="1421" t="3773" r="4296" b="4003" alignment="left" spaceBefore="73" lsp="exactly" lspExact="259" language="en">

<ln l="1421" t="3773" r="4296" b="4003" baseLine="3936" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-6">

<wd l="1421" t="3782" r="4296" b="4003">phd_siamak@yahoo.com</wd>

</ln>

</para>

</column>

<column l="4622" t="2570" r="7171" b="4008">

<para l="4622" t="2626" r="7166" b="3662" alignment="centered" lsp="exactly" lspExact="275" language="en">

<ln l="5170" t="2626" r="6590" b="2842" baseLine="2784" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5170" t="2626" r="6043" b="2842">Norisma</wd>

<space/>

<wd l="6106" t="2626" r="6590" b="2794">Idris
</wd>

</ln>

<ln l="4714" t="2890" r="7042" b="3110" baseLine="3058" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="4714" t="2894" r="5846" b="3110">Department</wd>

<space/>

<wd l="5909" t="2890" r="6130" b="3062">of</wd>

<space/>

<wd l="6168" t="2890" r="7042" b="3062">Artificial
</wd>

</ln>

<ln l="4622" t="3168" r="7166" b="3389" baseLine="3331" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="4622" t="3168" r="5794" b="3389">Intelligence,</wd>

<space/>

<wd l="5866" t="3168" r="6883" b="3389">University</wd>

<space/>

<wd l="6946" t="3168" r="7166" b="3341">of
</wd>

</ln>

<ln l="4661" t="3442" r="7099" b="3662" baseLine="3610" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="4661" t="3442" r="5424" b="3662">Malaya,</wd>

<space/>

<wd l="5506" t="3446" r="6144" b="3653">50603,</wd>

<space/>

<wd l="6216" t="3442" r="7099" b="3662">Malaysia</wd>

</ln>

</para>

<para l="4666" t="3782" r="7104" b="3984" alignment="left" spaceBefore="73" spaceAfter="19" lsp="exactly" lspExact="240" language="en">

<ln l="4666" t="3782" r="7104" b="3984" baseLine="3936" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0">

<wd l="4666" t="3782" r="7104" b="3984">norisma@um.edu.my</wd>

</ln>

</para>

</column>

<column l="7483" t="2570" r="10493" b="4008">

<para l="7522" t="2626" r="10430" b="3662" alignment="centered" lsp="exactly" lspExact="275" language="en">

<ln l="8285" t="2626" r="9658" b="2842" baseLine="2784" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8285" t="2626" r="9005" b="2842">Liyana</wd>

<space/>

<wd l="9077" t="2626" r="9658" b="2794">Shuib
</wd>

</ln>

<ln l="7675" t="2890" r="10272" b="3110" baseLine="3058" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="7675" t="2894" r="8803" b="3110">Department</wd>

<space/>

<wd l="8870" t="2890" r="9091" b="3062">of</wd>

<space/>

<wd l="9134" t="2890" r="10272" b="3062">Information
</wd>

</ln>

<ln l="7522" t="3168" r="10430" b="3389" baseLine="3331" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="7522" t="3173" r="8261" b="3389">System,</wd>

<space/>

<wd l="8333" t="3168" r="9350" b="3389">University</wd>

<space/>

<wd l="9408" t="3168" r="9629" b="3341">of</wd>

<space/>

<wd l="9662" t="3168" r="10430" b="3389">Malaya,
</wd>

</ln>

<ln l="8184" t="3442" r="9778" b="3662" baseLine="3610" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8184" t="3446" r="8822" b="3653">50603,</wd>

<space/>

<wd l="8894" t="3442" r="9778" b="3662">Malaysia</wd>

</ln>

</para>

<para l="7483" t="3782" r="10488" b="3984" alignment="left" spaceBefore="73" spaceAfter="19" lsp="exactly" lspExact="240" language="en">

<ln l="7483" t="3782" r="10488" b="3984" baseLine="3936" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-7">

<wd l="7483" t="3782" r="10488" b="3984">liyanashuib@um.edu.my</wd>

</ln>

</para>

</column>

</section>

<section l="1426" t="4322" r="10517" b="15177">

<column l="1426" t="4322" r="5472" b="15177">

<para l="2357" t="4373" r="4901" b="5414" alignment="centered" li="360" lsp="exactly" lspExact="274" language="en">

<ln l="2808" t="4373" r="4402" b="4589" baseLine="4536" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="2808" t="4373" r="3298" b="4541">Ram</wd>

<space/>

<wd l="3370" t="4373" r="3984" b="4589">Gopal</wd>

<space/>

<wd l="4046" t="4373" r="4402" b="4589">Raj
</wd>

</ln>

<ln l="2448" t="4642" r="4771" b="4862" baseLine="4805" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="2448" t="4646" r="3576" b="4862">Department</wd>

<space/>

<wd l="3643" t="4642" r="3864" b="4814">of</wd>

<space/>

<wd l="3898" t="4642" r="4771" b="4814">Artificial
</wd>

</ln>

<ln l="2357" t="4915" r="4901" b="5136" baseLine="5083" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="2357" t="4915" r="3528" b="5136">Intelligence,</wd>

<space/>

<wd l="3600" t="4915" r="4618" b="5136">University</wd>

<space/>

<wd l="4680" t="4915" r="4901" b="5088">of
</wd>

</ln>

<ln l="2395" t="5194" r="4834" b="5414" baseLine="5357" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="2395" t="5194" r="3158" b="5414">Malaya,</wd>

<space/>

<wd l="3240" t="5198" r="3878" b="5405">50603,</wd>

<space/>

<wd l="3950" t="5194" r="4834" b="5414">Malaysia</wd>

</ln>

</para>

<para l="2554" t="5534" r="4694" b="5736" alignment="centered" spaceBefore="73" lsp="exactly" lspExact="245" language="en">

<ln l="2554" t="5534" r="4694" b="5736" baseLine="5683" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-1">

<wd l="2554" t="5534" r="4694" b="5736">ramdr@um.edu.my</wd>

</ln>

</para>

<para l="1426" t="5846" r="3091" b="6019" alignment="left" spaceBefore="79" lsp="exactly" lspExact="209" language="en">

<ln l="1426" t="5846" r="3091" b="6019" baseLine="5971" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5846" r="2582" b="6019">*Corresponding</wd>

<space/>

<wd l="2630" t="5846" r="3091" b="5981">author</wd>

</ln>

</para>

<para l="3154" t="6317" r="4046" b="6485" alignment="centered" spaceBefore="245" lsp="exactly" lspExact="277" language="en">

<ln l="3154" t="6317" r="4046" b="6485" baseLine="6480" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3154" t="6317" r="4046" b="6485">Abstract</wd>

</ln>

</para>

<para l="1752" t="6821" r="5467" b="15115" alignment="justified" li="288" spaceBefore="230" spaceAfter="38" lsp="exactly" lspExact="253" language="en">

<ln l="1762" t="6821" r="5448" b="6979" baseLine="6974" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="6821" r="2102" b="6979">The</wd>

<space/>

<wd l="2203" t="6869" r="2496" b="6979">use</wd>

<space/>

<wd l="2602" t="6821" r="2803" b="6979">of</wd>

<space/>

<wd l="2890" t="6821" r="3389" b="6979">social</wd>

<space/>

<wd l="3494" t="6821" r="4219" b="6979">network</wd>

<space/>

<wd l="4325" t="6821" r="5021" b="6979">services</wd>

<space/>

<wd l="5131" t="6821" r="5448" b="6979">and</wd>

<space/>

</ln>

<ln l="1757" t="7075" r="5448" b="7277" baseLine="7224" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="7075" r="2798" b="7277">microblogs,</wd>

<space/>

<wd l="2899" t="7075" r="3293" b="7234">such</wd>

<space/>

<wd l="3384" t="7123" r="3557" b="7234">as</wd>

<space/>

<wd l="3648" t="7075" r="4339" b="7267">Twitter,</wd>

<space/>

<wd l="4430" t="7075" r="4723" b="7234">has</wd>

<space/>

<wd l="4814" t="7075" r="5448" b="7234">created</wd>

<space/>

</ln>

<ln l="1757" t="7325" r="5448" b="7517" baseLine="7478" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="7325" r="2506" b="7483">valuable</wd>

<space/>

<wd l="2640" t="7344" r="2971" b="7483">text</wd>

<space/>

<wd l="3106" t="7373" r="3984" b="7517">resources,</wd>

<space/>

<wd l="4128" t="7325" r="4666" b="7483">which</wd>

<space/>

<wd l="4805" t="7325" r="5448" b="7483">contain</wd>

<space/>

</ln>

<ln l="1762" t="7579" r="5443" b="7781" baseLine="7733" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="7579" r="2640" b="7781">extremely</wd>

<space/>

<wd l="2755" t="7579" r="3235" b="7781">noisy</wd>

<space/>

<wd l="3350" t="7598" r="3725" b="7738">text.</wd>

<space/>

<wd l="3854" t="7579" r="4502" b="7738">Twitter</wd>

<space/>

<wd l="4618" t="7627" r="5443" b="7781">messages</wd>

<space/>

</ln>

<ln l="1762" t="7834" r="5443" b="7992" baseLine="7982" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="7834" r="2410" b="7992">contain</wd>

<space/>

<wd l="2501" t="7882" r="2683" b="7992">so</wd>

<space/>

<wd l="2770" t="7834" r="3259" b="7992">much</wd>

<space/>

<wd l="3341" t="7834" r="3802" b="7992">noise</wd>

<space/>

<wd l="3888" t="7834" r="4219" b="7992">that</wd>

<space/>

<wd l="4306" t="7834" r="4426" b="7992">it</wd>

<space/>

<wd l="4512" t="7834" r="4651" b="7992">is</wd>

<space/>

<wd l="4742" t="7834" r="5443" b="7992">difficult</wd>

<space/>

</ln>

<ln l="1757" t="8088" r="5438" b="8290" baseLine="8237" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="8107" r="1925" b="8246">to</wd>

<space/>

<wd l="2069" t="8136" r="2362" b="8246">use</wd>

<space/>

<wd l="2501" t="8088" r="2947" b="8246">them</wd>

<space/>

<wd l="3086" t="8088" r="3254" b="8242">in</wd>

<space/>

<wd l="3394" t="8088" r="3998" b="8246">natural</wd>

<space/>

<wd l="4147" t="8088" r="4934" b="8290">language</wd>

<space/>

<wd l="5069" t="8136" r="5438" b="8290">pro-</wd>

</ln>

<ln l="1762" t="8338" r="5443" b="8539" baseLine="8491" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="8338" r="2410" b="8539">cessing</wd>

<space/>

<wd l="2486" t="8338" r="2971" b="8496">tasks.</wd>

<space/>

<wd l="3062" t="8338" r="3446" b="8496">This</wd>

<space/>

<wd l="3528" t="8386" r="4022" b="8539">paper</wd>

<space/>

<wd l="4094" t="8357" r="4819" b="8539">presents</wd>

<space/>

<wd l="4906" t="8386" r="5002" b="8496">a</wd>

<space/>

<wd l="5078" t="8386" r="5443" b="8496">new</wd>

<space/>

</ln>

<ln l="1762" t="8592" r="5448" b="8794" baseLine="8741" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="8592" r="2568" b="8794">approach</wd>

<space/>

<wd l="2707" t="8592" r="3187" b="8794">using</wd>

<space/>

<wd l="3326" t="8592" r="3595" b="8750">the</wd>

<space/>

<wd l="3739" t="8592" r="4632" b="8750">maximum</wd>

<space/>

<wd l="4776" t="8611" r="5448" b="8794">entropy</wd>

<space/>

</ln>

<ln l="1757" t="8846" r="5438" b="9048" baseLine="8995" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="8846" r="2304" b="9005">model</wd>

<space/>

<wd l="2395" t="8846" r="2654" b="9005">for</wd>

<space/>

<wd l="2731" t="8846" r="3797" b="9048">normalizing</wd>

<space/>

<wd l="3883" t="8851" r="4555" b="9005">Tweets.</wd>

<space/>

<wd l="4656" t="8846" r="4992" b="9005">The</wd>

<space/>

<wd l="5074" t="8894" r="5438" b="9048">pro-</wd>

</ln>

<ln l="1752" t="9096" r="5443" b="9298" baseLine="9250" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1752" t="9096" r="2280" b="9298">posed</wd>

<space/>

<wd l="2357" t="9096" r="3158" b="9298">approach</wd>

<space/>

<wd l="3240" t="9096" r="4075" b="9254">addresses</wd>

<space/>

<wd l="4152" t="9096" r="4690" b="9254">words</wd>

<space/>

<wd l="4771" t="9096" r="5102" b="9254">that</wd>

<space/>

<wd l="5184" t="9144" r="5443" b="9254">are</wd>

<space/>

</ln>

<ln l="1757" t="9350" r="5443" b="9552" baseLine="9504" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="9398" r="2371" b="9509">unseen</wd>

<space/>

<wd l="2486" t="9350" r="2654" b="9504">in</wd>

<space/>

<wd l="2765" t="9350" r="3034" b="9509">the</wd>

<space/>

<wd l="3149" t="9350" r="3835" b="9552">training</wd>

<space/>

<wd l="3941" t="9350" r="4488" b="9552">phase.</wd>

<space/>

<wd l="4613" t="9350" r="5443" b="9552">Although</wd>

<space/>

</ln>

<ln l="1757" t="9605" r="5448" b="9806" baseLine="9754" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="9605" r="2026" b="9763">the</wd>

<space/>

<wd l="2141" t="9605" r="3034" b="9763">maximum</wd>

<space/>

<wd l="3149" t="9624" r="3821" b="9806">entropy</wd>

<space/>

<wd l="3936" t="9605" r="4430" b="9763">needs</wd>

<space/>

<wd l="4555" t="9653" r="4651" b="9763">a</wd>

<space/>

<wd l="4762" t="9605" r="5448" b="9806">training</wd>

<space/>

</ln>

<ln l="1762" t="9854" r="5438" b="10056" baseLine="10008" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="9854" r="2371" b="10013">dataset</wd>

<space/>

<wd l="2458" t="9874" r="2626" b="10013">to</wd>

<space/>

<wd l="2717" t="9854" r="3240" b="10056">adjust</wd>

<space/>

<wd l="3326" t="9854" r="3528" b="10013">its</wd>

<space/>

<wd l="3614" t="9874" r="4632" b="10056">parameters,</wd>

<space/>

<wd l="4723" t="9854" r="4992" b="10013">the</wd>

<space/>

<wd l="5074" t="9902" r="5438" b="10056">pro-</wd>

</ln>

<ln l="1752" t="10109" r="5438" b="10310" baseLine="10262" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1752" t="10109" r="2280" b="10310">posed</wd>

<space/>

<wd l="2338" t="10109" r="3139" b="10310">approach</wd>

<space/>

<wd l="3202" t="10157" r="3494" b="10267">can</wd>

<space/>

<wd l="3557" t="10109" r="4435" b="10267">normalize</wd>

<space/>

<wd l="4493" t="10157" r="5107" b="10267">unseen</wd>

<space/>

<wd l="5170" t="10109" r="5438" b="10267">da-</wd>

</ln>

<ln l="1757" t="10363" r="5467" b="10565" baseLine="10512" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="10382" r="1920" b="10522">ta</wd>

<space/>

<wd l="2035" t="10363" r="2203" b="10517">in</wd>

<space/>

<wd l="2318" t="10363" r="2582" b="10522">the</wd>

<space/>

<wd l="2698" t="10363" r="3389" b="10565">training</wd>

<space/>

<wd l="3509" t="10382" r="3787" b="10522">set.</wd>

<space/>

<wd l="3912" t="10363" r="4248" b="10522">The</wd>

<space/>

<wd l="4358" t="10363" r="5146" b="10565">principle</wd>

<space/>

<wd l="5266" t="10363" r="5467" b="10522">of</wd>

<space/>

</ln>

<ln l="1757" t="10618" r="5438" b="10819" baseLine="10766" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="10618" r="2654" b="10776">maximum</wd>

<space/>

<wd l="2774" t="10637" r="3446" b="10819">entropy</wd>

<space/>

<wd l="3566" t="10618" r="4574" b="10819">emphasizes</wd>

<space/>

<wd l="4704" t="10618" r="5438" b="10819">incorpo-</wd>

</ln>

<ln l="1757" t="10867" r="5438" b="11069" baseLine="11016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="10867" r="2275" b="11069">rating</wd>

<space/>

<wd l="2376" t="10867" r="2645" b="11026">the</wd>

<space/>

<wd l="2750" t="10867" r="3542" b="11026">available</wd>

<space/>

<wd l="3648" t="10867" r="4334" b="11026">features</wd>

<space/>

<wd l="4450" t="10867" r="4781" b="11026">into</wd>

<space/>

<wd l="4896" t="10915" r="4992" b="11026">a</wd>

<space/>

<wd l="5088" t="10867" r="5438" b="11026">uni-</wd>

</ln>

<ln l="1762" t="11122" r="5467" b="11323" baseLine="11270" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="11122" r="2189" b="11280">form</wd>

<space/>

<wd l="2280" t="11122" r="2875" b="11280">model.</wd>

<space/>

<wd l="2981" t="11122" r="3437" b="11314">First,</wd>

<space/>

<wd l="3538" t="11170" r="3792" b="11280">we</wd>

<space/>

<wd l="3893" t="11141" r="4632" b="11323">generate</wd>

<space/>

<wd l="4733" t="11170" r="4829" b="11280">a</wd>

<space/>

<wd l="4934" t="11141" r="5170" b="11280">set</wd>

<space/>

<wd l="5270" t="11122" r="5467" b="11280">of</wd>

<space/>

</ln>

<ln l="1757" t="11376" r="5438" b="11534" baseLine="11525" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="11376" r="2755" b="11534">normalized</wd>

<space/>

<wd l="2880" t="11376" r="3797" b="11534">candidates</wd>

<space/>

<wd l="3931" t="11376" r="4186" b="11534">for</wd>

<space/>

<wd l="4310" t="11376" r="4714" b="11534">each</wd>

<space/>

<wd l="4838" t="11376" r="5438" b="11534">out-of-</wd>

</ln>

<ln l="1757" t="11626" r="5438" b="11827" baseLine="11774" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="11626" r="2736" b="11827">vocabulary</wd>

<space/>

<wd l="2832" t="11626" r="3293" b="11784">word</wd>

<space/>

<wd l="3379" t="11626" r="3893" b="11784">based</wd>

<space/>

<wd l="3989" t="11674" r="4205" b="11784">on</wd>

<space/>

<wd l="4306" t="11626" r="4939" b="11818">lexical,</wd>

<space/>

<wd l="5040" t="11626" r="5438" b="11827">pho-</wd>

</ln>

<ln l="1757" t="11880" r="5434" b="12082" baseLine="12029" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="11880" r="2342" b="12072">nemic,</wd>

<space/>

<wd l="2419" t="11880" r="2741" b="12038">and</wd>

<space/>

<wd l="2798" t="11880" r="4344" b="12082">morphophonemic</wd>

<space/>

<wd l="4421" t="11880" r="5434" b="12038">similarities.</wd>

<space/>

</ln>

<ln l="1762" t="12134" r="5438" b="12336" baseLine="12283" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="12134" r="2261" b="12326">Then,</wd>

<space/>

<wd l="2381" t="12134" r="2822" b="12293">three</wd>

<space/>

<wd l="2942" t="12134" r="3696" b="12293">different</wd>

<space/>

<wd l="3806" t="12134" r="4781" b="12336">probability</wd>

<space/>

<wd l="4906" t="12182" r="5438" b="12293">scores</wd>

<space/>

</ln>

<ln l="1762" t="12384" r="5448" b="12586" baseLine="12533" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="12432" r="2026" b="12542">are</wd>

<space/>

<wd l="2141" t="12384" r="3034" b="12542">calculated</wd>

<space/>

<wd l="3144" t="12384" r="3398" b="12542">for</wd>

<space/>

<wd l="3509" t="12384" r="3907" b="12542">each</wd>

<space/>

<wd l="4022" t="12384" r="4858" b="12542">candidate</wd>

<space/>

<wd l="4968" t="12384" r="5448" b="12586">using</wd>

<space/>

</ln>

<ln l="1752" t="12638" r="5448" b="12840" baseLine="12787" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1752" t="12638" r="2626" b="12840">positional</wd>

<space/>

<wd l="2722" t="12638" r="3533" b="12840">indexing,</wd>

<space/>

<wd l="3638" t="12686" r="3734" b="12797">a</wd>

<space/>

<wd l="3826" t="12638" r="5448" b="12840">dependency-based</wd>

<space/>

</ln>

<ln l="1762" t="12893" r="5434" b="13094" baseLine="13042" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="12893" r="2640" b="13094">frequency</wd>

<space/>

<wd l="2722" t="12893" r="3326" b="13051">feature</wd>

<space/>

<wd l="3408" t="12893" r="3725" b="13051">and</wd>

<space/>

<wd l="3802" t="12941" r="3898" b="13051">a</wd>

<space/>

<wd l="3974" t="12893" r="4762" b="13094">language</wd>

<space/>

<wd l="4843" t="12893" r="5434" b="13051">model.</wd>

<space/>

</ln>

<ln l="1757" t="13147" r="5438" b="13349" baseLine="13296" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="13147" r="2227" b="13306">After</wd>

<space/>

<wd l="2285" t="13147" r="2554" b="13306">the</wd>

<space/>

<wd l="2621" t="13147" r="3283" b="13349">optimal</wd>

<space/>

<wd l="3355" t="13147" r="3912" b="13306">values</wd>

<space/>

<wd l="3984" t="13147" r="4186" b="13306">of</wd>

<space/>

<wd l="4224" t="13147" r="4493" b="13306">the</wd>

<space/>

<wd l="4555" t="13147" r="5102" b="13306">model</wd>

<space/>

<wd l="5165" t="13195" r="5438" b="13349">pa-</wd>

</ln>

<ln l="1757" t="13397" r="5438" b="13598" baseLine="13546" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="13416" r="2515" b="13555">rameters</wd>

<space/>

<wd l="2587" t="13445" r="2851" b="13555">are</wd>

<space/>

<wd l="2923" t="13397" r="3682" b="13555">obtained</wd>

<space/>

<wd l="3744" t="13397" r="3912" b="13550">in</wd>

<space/>

<wd l="3979" t="13445" r="4075" b="13555">a</wd>

<space/>

<wd l="4138" t="13397" r="4824" b="13598">training</wd>

<space/>

<wd l="4882" t="13397" r="5438" b="13598">phase,</wd>

<space/>

</ln>

<ln l="1757" t="13651" r="5438" b="13853" baseLine="13800" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="13651" r="2026" b="13810">the</wd>

<space/>

<wd l="2117" t="13651" r="2664" b="13810">model</wd>

<space/>

<wd l="2760" t="13699" r="3058" b="13810">can</wd>

<space/>

<wd l="3154" t="13651" r="3931" b="13810">calculate</wd>

<space/>

<wd l="4022" t="13651" r="4286" b="13810">the</wd>

<space/>

<wd l="4382" t="13651" r="4781" b="13810">final</wd>

<space/>

<wd l="4867" t="13651" r="5438" b="13853">proba-</wd>

</ln>

<ln l="1752" t="13901" r="5443" b="14102" baseLine="14054" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1752" t="13901" r="2227" b="14102">bility</wd>

<space/>

<wd l="2294" t="13901" r="2770" b="14059">value</wd>

<space/>

<wd l="2846" t="13901" r="3101" b="14059">for</wd>

<space/>

<wd l="3178" t="13901" r="4142" b="14059">candidates.</wd>

<space/>

<wd l="4229" t="13901" r="4565" b="14059">The</wd>

<space/>

<wd l="4646" t="13901" r="5443" b="14102">approach</wd>

<space/>

</ln>

<ln l="1762" t="14155" r="5448" b="14357" baseLine="14304" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1762" t="14155" r="2544" b="14314">achieved</wd>

<space/>

<wd l="2611" t="14203" r="2818" b="14314">an</wd>

<space/>

<wd l="2894" t="14155" r="3374" b="14314">83.12</wd>

<space/>

<wd l="3446" t="14160" r="4022" b="14314">BLEU</wd>

<space/>

<wd l="4099" t="14203" r="4550" b="14314">score</wd>

<space/>

<wd l="4622" t="14155" r="4790" b="14309">in</wd>

<space/>

<wd l="4858" t="14155" r="5448" b="14357">testing</wd>

<space/>

</ln>

<ln l="1757" t="14410" r="5438" b="14611" baseLine="14558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1757" t="14410" r="2237" b="14611">using</wd>

<space/>

<wd l="2299" t="14410" r="2784" b="14602">2,000</wd>

<space/>

<wd l="2851" t="14414" r="3528" b="14568">Tweets.</wd>

<space/>

<wd l="3600" t="14410" r="3941" b="14568">Our</wd>

<space/>

<wd l="3998" t="14410" r="5141" b="14611">experimental</wd>

<space/>

<wd l="5203" t="14458" r="5438" b="14568">re-</wd>

</ln>

<ln l="1766" t="14659" r="5438" b="14861" baseLine="14813" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1766" t="14659" r="2160" b="14818">sults</wd>

<space/>

<wd l="2237" t="14659" r="2688" b="14818">show</wd>

<space/>

<wd l="2750" t="14659" r="3082" b="14818">that</wd>

<space/>

<wd l="3144" t="14659" r="3413" b="14818">the</wd>

<space/>

<wd l="3475" t="14659" r="4368" b="14818">maximum</wd>

<space/>

<wd l="4430" t="14678" r="5102" b="14861">entropy</wd>

<space/>

<wd l="5165" t="14707" r="5438" b="14861">ap-</wd>

</ln>

<ln l="1752" t="14914" r="5438" b="15115" baseLine="15067">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="1752" t="14914" r="2357" b="15115">proach</wd>

<space/>

<wd l="2496" t="14914" r="3586" b="15115">significantly</wd>

<space/>

<wd l="3720" t="14914" r="4786" b="15115">outperforms</wd>

<space/>

<wd l="4915" t="14914" r="5438" b="15115">previ-</wd>

</run>

<run fontFace="Times New Roman" fontFamily="roman" fontPitch="variable"><nl orig="true"/>

</run>

</ln>

</para>

</column>

<column l="5765" t="4322" r="10517" b="15177">

<para l="7147" t="4373" r="8131" b="4541" alignment="left" li="1368" spaceBefore="3" lsp="exactly" lspExact="272" language="en">

<ln l="7147" t="4373" r="8131" b="4541" baseLine="4536" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7147" t="4373" r="7613" b="4536">AiTi</wd>

<space/>

<wd l="7675" t="4373" r="8131" b="4541">Aw*</wd>

</ln>

</para>

<para l="5765" t="4642" r="9514" b="5136" alignment="left" li="864" ri="1008" fli="-864" lsp="exactly" lspExact="273" language="en">

<ln l="5765" t="4642" r="9514" b="4862" baseLine="4805" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="5765" t="4642" r="6542" b="4814">Institute</wd>

<space/>

<wd l="6610" t="4642" r="6888" b="4814">for</wd>

<space/>

<wd l="6955" t="4642" r="7944" b="4814">Infocomm</wd>

<space/>

<wd l="8006" t="4642" r="8885" b="4814">Research</wd>

<space/>

<wd l="8957" t="4642" r="9514" b="4862">(I2R),</wd>

<space/>

</ln>

<ln l="6643" t="4915" r="8635" b="5136" baseLine="5083" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6643" t="4915" r="7598" b="5126">A*STAR,</wd>

<space/>

<wd l="7685" t="4915" r="8635" b="5136">Singapore</wd>

</ln>

</para>

<para l="6000" t="5256" r="9293" b="5458" alignment="left" li="216" spaceBefore="74" lsp="exactly" lspExact="240" language="en">

<ln l="6000" t="5256" r="9293" b="5458" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0">

<wd l="6000" t="5256" r="9293" b="5458">aaiti@i2r.a-star.edu.sg</wd>

</ln>

</para>

<para l="6466" t="6062" r="10142" b="6475" alignment="left" li="576" ri="360" spaceBefore="560" lsp="exactly" lspExact="253" language="en">

<ln l="6466" t="6062" r="10142" b="6264" baseLine="6216" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="6466" t="6110" r="6763" b="6221">ous</wd>

<space/>

<wd l="6845" t="6062" r="7891" b="6221">well-known</wd>

<space/>

<wd l="7968" t="6062" r="9192" b="6221">normalization</wd>

<space/>

<wd l="9274" t="6062" r="10142" b="6264">approach-</wd>

</ln>

<ln l="6466" t="6322" r="6691" b="6475" baseLine="6466" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="6466" t="6365" r="6691" b="6475">es.</wd>

</ln>

</para>

<para l="6134" t="6821" r="7858" b="6989" alignment="left" li="360" spaceBefore="249" lsp="exactly" lspExact="277" language="en">

<ln l="6134" t="6821" r="7858" b="6989" baseLine="6984" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16">

<wd l="6134" t="6821" r="6221" b="6984">1</wd>

<space/>

<wd l="6557" t="6821" r="7858" b="6989">Introduction</wd>

</ln>

</para>

<para l="6120" t="7248" r="10493" b="9470" alignment="justified" li="360" spaceBefore="153" lsp="exactly" lspExact="252" language="en">

<ln l="6125" t="7248" r="10478" b="7406" baseLine="7397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7248" r="6466" b="7406">The</wd>

<space/>

<wd l="6538" t="7248" r="7118" b="7406">advent</wd>

<space/>

<wd l="7190" t="7248" r="7392" b="7406">of</wd>

<space/>

<wd l="7435" t="7248" r="7848" b="7406">Web</wd>

<space/>

<wd l="7925" t="7248" r="8194" b="7406">2.0</wd>

<space/>

<wd l="8270" t="7248" r="8582" b="7406">and</wd>

<space/>

<wd l="8650" t="7248" r="9509" b="7406">electronic</wd>

<space/>

<wd l="9586" t="7248" r="10478" b="7406">communi-</wd>

</ln>

<ln l="6125" t="7498" r="10493" b="7656" baseLine="7651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7498" r="6744" b="7656">cations</wd>

<space/>

<wd l="6854" t="7498" r="7142" b="7656">has</wd>

<space/>

<wd l="7258" t="7498" r="7944" b="7656">enabled</wd>

<space/>

<wd l="8040" t="7498" r="8309" b="7656">the</wd>

<space/>

<wd l="8424" t="7498" r="9245" b="7656">extensive</wd>

<space/>

<wd l="9360" t="7498" r="10066" b="7656">creation</wd>

<space/>

<wd l="10176" t="7498" r="10493" b="7656">and</wd>

<space/>

</ln>

<ln l="6125" t="7752" r="10478" b="7954" baseLine="7906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7752" r="7344" b="7910">dissemination</wd>

<space/>

<wd l="7440" t="7752" r="7637" b="7910">of</wd>

<space/>

<wd l="7704" t="7752" r="9005" b="7954">user-generated</wd>

<space/>

<wd l="9091" t="7771" r="9734" b="7910">content</wd>

<space/>

<wd l="9826" t="7752" r="10478" b="7954">(UGC).</wd>

<space/>

</ln>

<ln l="6125" t="8006" r="10493" b="8208" baseLine="8155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="8006" r="6466" b="8165">The</wd>

<space/>

<wd l="6595" t="8006" r="7056" b="8165">UGC</wd>

<space/>

<wd l="7195" t="8006" r="8141" b="8165">collections</wd>

<space/>

<wd l="8270" t="8006" r="8942" b="8208">provide</wd>

<space/>

<wd l="9077" t="8006" r="9989" b="8165">invaluable</wd>

<space/>

<wd l="10128" t="8006" r="10493" b="8165">data</wd>

<space/>

</ln>

<ln l="6130" t="8261" r="10483" b="8419" baseLine="8410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="8309" r="6778" b="8419">sources</wd>

<space/>

<wd l="6883" t="8261" r="7051" b="8414">in</wd>

<space/>

<wd l="7152" t="8261" r="7613" b="8419">order</wd>

<space/>

<wd l="7709" t="8280" r="7877" b="8419">to</wd>

<space/>

<wd l="7978" t="8261" r="8414" b="8419">mine</wd>

<space/>

<wd l="8515" t="8261" r="8832" b="8419">and</wd>

<space/>

<wd l="8928" t="8280" r="9523" b="8419">extract</wd>

<space/>

<wd l="9614" t="8261" r="10483" b="8419">beneficial</wd>

<space/>

</ln>

<ln l="6125" t="8510" r="10483" b="8712" baseLine="8664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="8510" r="7162" b="8669">information</wd>

<space/>

<wd l="7262" t="8510" r="7579" b="8669">and</wd>

<space/>

<wd l="7666" t="8510" r="8678" b="8712">knowledge,</wd>

<space/>

<wd l="8779" t="8510" r="9317" b="8702">while,</wd>

<space/>

<wd l="9422" t="8530" r="9581" b="8669">at</wd>

<space/>

<wd l="9672" t="8510" r="9941" b="8669">the</wd>

<space/>

<wd l="10046" t="8558" r="10483" b="8669">same</wd>

<space/>

</ln>

<ln l="6120" t="8765" r="10483" b="8966" baseLine="8914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="8765" r="6562" b="8957">time,</wd>

<space/>

<wd l="6715" t="8765" r="7488" b="8966">resulting</wd>

<space/>

<wd l="7642" t="8765" r="7810" b="8918">in</wd>

<space/>

<wd l="7963" t="8765" r="8285" b="8923">less</wd>

<space/>

<wd l="8448" t="8765" r="9552" b="8923">standardized</wd>

<space/>

<wd l="9701" t="8765" r="10483" b="8966">language</wd>

<space/>

</ln>

<ln l="6125" t="9019" r="10483" b="9221" baseLine="9168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9019" r="6691" b="9221">(Clark</wd>

<space/>

<wd l="6826" t="9019" r="6989" b="9178">&amp;</wd>

<space/>

<wd l="7128" t="9019" r="7675" b="9211">Araki,</wd>

<space/>

<wd l="7824" t="9019" r="8309" b="9211">2011;</wd>

<space/>

<wd l="8453" t="9019" r="9432" b="9221">Daugherty,</wd>

<space/>

<wd l="9576" t="9019" r="10176" b="9211">Eastin,</wd>

<space/>

<wd l="10320" t="9019" r="10483" b="9178">&amp;</wd>

<space/>

</ln>

<ln l="6120" t="9269" r="7354" b="9470" baseLine="9422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="9269" r="6734" b="9470">Bright,</wd>

<space/>

<wd l="6802" t="9269" r="7354" b="9470">2008).</wd>

</ln>

</para>

<para l="6120" t="9523" r="10512" b="13354" alignment="justified" li="360" fli="216" lsp="exactly" lspExact="261" language="en">

<ln l="6350" t="9523" r="10498" b="9725" baseLine="9677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="9528" r="7205" b="9715">However,</wd>

<space/>

<wd l="7282" t="9523" r="7680" b="9682">such</wd>

<space/>

<wd l="7742" t="9542" r="8390" b="9682">content</wd>

<space/>

<wd l="8458" t="9523" r="9192" b="9725">diverges</wd>

<space/>

<wd l="9264" t="9523" r="9686" b="9682">from</wd>

<space/>

<wd l="9754" t="9523" r="10498" b="9682">standard</wd>

<space/>

</ln>

<ln l="6120" t="9778" r="10483" b="9979" baseLine="9926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="9778" r="6758" b="9979">writing</wd>

<space/>

<wd l="6950" t="9778" r="8054" b="9936">conventions.</wd>

<space/>

<wd l="8251" t="9778" r="8491" b="9936">As</wd>

<space/>

<wd l="8693" t="9778" r="9259" b="9936">shown</wd>

<space/>

<wd l="9442" t="9778" r="9667" b="9979">by</wd>

<space/>

<wd l="9854" t="9797" r="10483" b="9979">experts</wd>

<space/>

</ln>

<ln l="6125" t="10027" r="10483" b="10229" baseLine="10181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10027" r="7282" b="10229">(Bieswanger,</wd>

<space/>

<wd l="7406" t="10027" r="7886" b="10219">2007;</wd>

<space/>

<wd l="8011" t="10027" r="8765" b="10186">Thurlow</wd>

<space/>

<wd l="8880" t="10027" r="9043" b="10186">&amp;</wd>

<space/>

<wd l="9158" t="10032" r="9806" b="10219">Brown,</wd>

<space/>

<wd l="9926" t="10027" r="10483" b="10229">2003),</wd>

<space/>

</ln>

<ln l="6120" t="10282" r="10512" b="10483" baseLine="10435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10282" r="6437" b="10440">this</wd>

<space/>

<wd l="6504" t="10282" r="7464" b="10483">divergence</wd>

<space/>

<wd l="7531" t="10282" r="7670" b="10440">is</wd>

<space/>

<wd l="7738" t="10282" r="8050" b="10440">due</wd>

<space/>

<wd l="8112" t="10301" r="8280" b="10440">to</wd>

<space/>

<wd l="8342" t="10282" r="8606" b="10440">the</wd>

<space/>

<wd l="8669" t="10330" r="9168" b="10483">usage</wd>

<space/>

<wd l="9240" t="10282" r="9442" b="10440">of</wd>

<space/>

<wd l="9485" t="10330" r="9581" b="10440">a</wd>

<space/>

<wd l="9638" t="10282" r="10248" b="10483">variety</wd>

<space/>

<wd l="10310" t="10282" r="10512" b="10440">of</wd>

<space/>

</ln>

<ln l="6125" t="10536" r="10483" b="10738" baseLine="10685">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="10536" r="6725" b="10738">coding</wd>

<space/>

<wd l="6802" t="10536" r="7675" b="10738">strategies,</wd>

<space/>

<wd l="7757" t="10536" r="8582" b="10738">including</wd>

<space/>

<wd l="8659" t="10536" r="9058" b="10738">digit</wd>

<space/>

<wd l="9125" t="10536" r="10018" b="10738">phonemes</wd>

<space/>

</run>

<wd l="10099" t="10536" r="10483" b="10738"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">you</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6125" t="10771" r="10483" b="11011" baseLine="10958">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="10838" r="6398" b="10968">too</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="6509" t="10843" r="6715" b="10925">—</wd>

<space/>

</run>

<wd l="6749" t="10810" r="7320" b="11011"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">you2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7392" t="10810" r="8146" b="11011">phonetic</wd>

<space/>

<wd l="8227" t="10810" r="9418" b="11011">transcriptions</wd>

<space/>

</run>

<wd l="9499" t="10810" r="9883" b="11011"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">you</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="9994" t="10843" r="10200" b="10925">—</wd>

<space/>

</run>

<wd l="10258" t="10810" r="10483" b="11011"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">u</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6120" t="11054" r="10483" b="11294" baseLine="11246">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6120" t="11093" r="6653" b="11251">vowel</wd>

<space/>

<wd l="6725" t="11093" r="7205" b="11294">drops</wd>

<space/>

</run>

<wd l="7277" t="11093" r="7925" b="11294"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">dinner</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="8011" t="11126" r="8218" b="11208">—</wd>

<space/>

</run>

<wd l="8261" t="11093" r="8798" b="11294"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">dnnr</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8866" t="11093" r="9960" b="11294">misspellings</wd>

<space/>

</run>

<wd l="10027" t="11093" r="10483" b="11294"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">con-</run>

</wd>

</ln>

<ln l="6125" t="11338" r="10483" b="11578" baseLine="11530">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="11386" r="6893" b="11534">venience</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="7008" t="11410" r="7214" b="11491">—</wd>

<space/>

</run>

<wd l="7282" t="11376" r="8448" b="11578"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">convineince</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8539" t="11376" r="8856" b="11534">and</wd>

<space/>

<wd l="8933" t="11376" r="9619" b="11578">missing</wd>

<space/>

<wd l="9706" t="11424" r="9883" b="11534">or</wd>

<space/>

<wd l="9970" t="11376" r="10483" b="11534">incor-</wd>

</run>

</ln>

<ln l="6120" t="11645" r="10483" b="11846" baseLine="11794">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6120" t="11664" r="6451" b="11803">rect</wd>

<space/>

<wd l="6509" t="11645" r="7555" b="11846">punctuation</wd>

<space/>

<wd l="7613" t="11645" r="8146" b="11803">marks</wd>

<space/>

</run>

<wd l="8275" t="11645" r="8525" b="11846"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">If</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8530" t="11650" r="8630" b="11798">I</wd>

<space/>

<wd l="8678" t="11698" r="9101" b="11803">were</wd>

<space/>

<wd l="9144" t="11698" r="9518" b="11846">you,</wd>

<space/>

<wd l="9590" t="11645" r="9840" b="11803">I&apos;d</wd>

<space/>

<wd l="9864" t="11645" r="10483" b="11846">proba-</wd>

</run>

</ln>

<ln l="6125" t="11880" r="10488" b="12120" baseLine="12067">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="11918" r="6394" b="12120">bly</wd>

<space/>

<wd l="6461" t="11971" r="6725" b="12120">go.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="6859" t="11952" r="7066" b="12034">—</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7114" t="11918" r="7306" b="12120">If</wd>

<space/>

<wd l="7330" t="11923" r="7430" b="12072">I</wd>

<space/>

<wd l="7493" t="11971" r="7920" b="12077">were</wd>

<space/>

<wd l="7982" t="11971" r="8314" b="12120">you</wd>

<space/>

<wd l="8386" t="11918" r="8587" b="12077">Id</wd>

<space/>

<wd l="8630" t="11918" r="9456" b="12120">probably</wd>

<space/>

</run>

<wd l="9523" t="11918" r="9869" b="12120"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">go</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9970" t="11918" r="10488" b="12077">These</wd>

<space/>

</run>

</ln>

<ln l="6125" t="12182" r="10483" b="12384" baseLine="12336" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12182" r="7037" b="12341">alterations</wd>

<space/>

<wd l="7128" t="12230" r="7392" b="12341">are</wd>

<space/>

<wd l="7483" t="12182" r="7795" b="12341">due</wd>

<space/>

<wd l="7882" t="12202" r="8050" b="12341">to</wd>

<space/>

<wd l="8141" t="12182" r="8578" b="12341">three</wd>

<space/>

<wd l="8664" t="12182" r="9106" b="12341">main</wd>

<space/>

<wd l="9187" t="12202" r="10200" b="12384">parameters:</wd>

<space/>

<wd l="10330" t="12182" r="10483" b="12384">1)</wd>

<space/>

</ln>

<ln l="6125" t="12437" r="10483" b="12638" baseLine="12586" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12437" r="6466" b="12595">The</wd>

<space/>

<wd l="6571" t="12437" r="7037" b="12595">small</wd>

<space/>

<wd l="7142" t="12437" r="8030" b="12595">allowance</wd>

<space/>

<wd l="8131" t="12437" r="8333" b="12595">of</wd>

<space/>

<wd l="8414" t="12437" r="9350" b="12629">characters,</wd>

<space/>

<wd l="9456" t="12437" r="9629" b="12638">2)</wd>

<space/>

<wd l="9734" t="12437" r="9998" b="12595">the</wd>

<space/>

<wd l="10104" t="12485" r="10483" b="12595">con-</wd>

</ln>

<ln l="6130" t="12686" r="10483" b="12888" baseLine="12840" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="12686" r="6754" b="12845">straints</wd>

<space/>

<wd l="6835" t="12686" r="7037" b="12845">of</wd>

<space/>

<wd l="7085" t="12686" r="7354" b="12845">the</wd>

<space/>

<wd l="7440" t="12686" r="7901" b="12845">small</wd>

<space/>

<wd l="7978" t="12686" r="8746" b="12888">keypads,</wd>

<space/>

<wd l="8832" t="12686" r="9149" b="12845">and</wd>

<space/>

<wd l="9221" t="12686" r="9394" b="12888">3)</wd>

<space/>

<wd l="9475" t="12686" r="9950" b="12888">using</wd>

<space/>

<wd l="10022" t="12686" r="10483" b="12845">UGC</wd>

<space/>

</ln>

<ln l="6125" t="12941" r="10493" b="13099" baseLine="13094" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12941" r="6293" b="13094">in</wd>

<space/>

<wd l="6365" t="12941" r="7109" b="13099">informal</wd>

<space/>

<wd l="7186" t="12941" r="8630" b="13099">communications</wd>

<space/>

<wd l="8693" t="12941" r="9432" b="13099">between</wd>

<space/>

<wd l="9499" t="12941" r="10104" b="13099">friends</wd>

<space/>

<wd l="10176" t="12941" r="10493" b="13099">and</wd>

<space/>

</ln>

<ln l="6120" t="13195" r="6912" b="13354" baseLine="13344" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="13195" r="6912" b="13354">relatives.</wd>

</ln>

</para>

<para l="6115" t="13450" r="10512" b="15168" alignment="justified" li="360" fli="216" lsp="exactly" lspExact="251" language="en">

<ln l="6350" t="13450" r="10483" b="13642" baseLine="13598" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="13450" r="7210" b="13608">Whatever</wd>

<space/>

<wd l="7306" t="13450" r="7714" b="13608">their</wd>

<space/>

<wd l="7814" t="13498" r="8429" b="13642">causes,</wd>

<space/>

<wd l="8534" t="13450" r="8986" b="13608">these</wd>

<space/>

<wd l="9091" t="13450" r="9998" b="13608">alterations</wd>

<space/>

<wd l="10104" t="13498" r="10483" b="13608">con-</wd>

</ln>

<ln l="6130" t="13699" r="10488" b="13901" baseLine="13853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="13699" r="6931" b="13901">siderably</wd>

<space/>

<wd l="7051" t="13699" r="7550" b="13858">affect</wd>

<space/>

<wd l="7670" t="13747" r="7987" b="13901">any</wd>

<space/>

<wd l="8112" t="13699" r="8856" b="13858">standard</wd>

<space/>

<wd l="8966" t="13699" r="9576" b="13858">natural</wd>

<space/>

<wd l="9701" t="13699" r="10488" b="13901">language</wd>

<space/>

</ln>

<ln l="6115" t="13954" r="10512" b="14155" baseLine="14102" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="13954" r="7066" b="14155">processing</wd>

<space/>

<wd l="7138" t="13954" r="7690" b="14155">(NLP)</wd>

<space/>

<wd l="7776" t="13973" r="8419" b="14155">system,</wd>

<space/>

<wd l="8506" t="13954" r="8818" b="14112">due</wd>

<space/>

<wd l="8890" t="13973" r="9058" b="14112">to</wd>

<space/>

<wd l="9130" t="13954" r="9394" b="14112">the</wd>

<space/>

<wd l="9461" t="14002" r="10234" b="14155">presence</wd>

<space/>

<wd l="10310" t="13954" r="10512" b="14112">of</wd>

<space/>

</ln>

<ln l="6120" t="14208" r="10483" b="14410" baseLine="14357" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="14256" r="6610" b="14410">many</wd>

<space/>

<wd l="6758" t="14227" r="7037" b="14366">out</wd>

<space/>

<wd l="7186" t="14208" r="7387" b="14366">of</wd>

<space/>

<wd l="7507" t="14208" r="8486" b="14410">vocabulary</wd>

<space/>

<wd l="8630" t="14208" r="9245" b="14410">(OOV)</wd>

<space/>

<wd l="9398" t="14208" r="9984" b="14400">words,</wd>

<space/>

<wd l="10138" t="14208" r="10483" b="14366">also</wd>

<space/>

</ln>

<ln l="6120" t="14458" r="10483" b="14659" baseLine="14611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="14458" r="6720" b="14616">known</wd>

<space/>

<wd l="6816" t="14506" r="6994" b="14616">as</wd>

<space/>

<wd l="7090" t="14458" r="8246" b="14616">non-standard</wd>

<space/>

<wd l="8338" t="14458" r="8870" b="14616">words</wd>

<space/>

<wd l="8971" t="14458" r="9682" b="14659">(NSWs)</wd>

<space/>

<wd l="9787" t="14458" r="10104" b="14616">and</wd>

<space/>

<wd l="10195" t="14506" r="10483" b="14616">un-</wd>

</ln>

<ln l="6120" t="14712" r="10488" b="14904" baseLine="14866" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="14712" r="6720" b="14870">known</wd>

<space/>

<wd l="6840" t="14712" r="7421" b="14870">words.</wd>

<space/>

<wd l="7560" t="14712" r="8472" b="14904">Therefore,</wd>

<space/>

<wd l="8602" t="14760" r="8698" b="14870">a</wd>

<space/>

<wd l="8813" t="14731" r="9144" b="14870">text</wd>

<space/>

<wd l="9264" t="14712" r="10488" b="14870">normalization</wd>

<space/>

</ln>

<ln l="6115" t="14966" r="10483" b="15168" baseLine="15115">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6115" t="15014" r="6778" b="15168">process</wd>

<space/>

<wd l="6878" t="14986" r="7306" b="15125">must</wd>

<space/>

<wd l="7392" t="14966" r="7603" b="15125">be</wd>

<space/>

<wd l="7694" t="14966" r="8621" b="15168">performed</wd>

<space/>

<wd l="8707" t="14966" r="9274" b="15125">before</wd>

<space/>

<wd l="9374" t="15014" r="9686" b="15168">any</wd>

<space/>

<wd l="9782" t="15014" r="10483" b="15125">conven-</wd>

</run>

<run fontFace="Times New Roman" fontFamily="roman" fontPitch="variable"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<section l="1421" t="15177" r="10517" b="16480">

<column l="1421" t="15177" r="10517" b="16480">

<para l="5824" t="15792" r="6143" b="15946" alignment="centered" spaceBefore="567" lsp="exactly" lspExact="249" language="en">

<ln l="5890" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="32">

<wd l="5890" t="15792" r="6077" b="15946">19</wd>

</ln>

</para>

<para l="2918" t="16133" r="8981" b="16469" alignment="centered" spaceBefore="139" lsp="exactly" lspExact="170" language="en">

<ln l="2918" t="16133" r="8981" b="16301" baseLine="16253" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2918" t="16133" r="3802" b="16296">Proceedings</wd>

<space/>

<wd l="3854" t="16133" r="4018" b="16296">of</wd>

<space/>

<wd l="4037" t="16133" r="4248" b="16262">the</wd>

<space/>

<wd l="4286" t="16138" r="4622" b="16262">ACL</wd>

<space/>

<wd l="4666" t="16133" r="5026" b="16262">2015</wd>

<space/>

<wd l="5078" t="16133" r="5779" b="16296">Workshop</wd>

<space/>

<wd l="5832" t="16176" r="6000" b="16262">on</wd>

<space/>

<wd l="6043" t="16138" r="6456" b="16296">Noisy</wd>

<space/>

<wd l="6518" t="16133" r="7627" b="16296">User-generated</wd>

<space/>

</run>

<wd l="7675" t="16138" r="7992" b="16286"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Text</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8045" t="16171" r="8443" b="16301">pages</wd>

<space/>

<wd l="8515" t="16133" r="8981" b="16286">19–27,
</wd>

</run>

</ln>

<ln l="3029" t="16296" r="8870" b="16469" baseLine="16425" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="16301" r="3595" b="16469">Beijing,</wd>

<space/>

<wd l="3653" t="16301" r="4114" b="16454">China,</wd>

<space/>

<wd l="4166" t="16301" r="4459" b="16469">July</wd>

<space/>

<wd l="4512" t="16301" r="4723" b="16454">31,</wd>

<space/>

<wd l="4781" t="16301" r="5170" b="16430">2015.</wd>

<space/>

</run>

<wd l="5246" t="16296" r="5770" b="16469"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">�</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5822" t="16301" r="6672" b="16430">Association</wd>

<space/>

<wd l="6715" t="16301" r="6926" b="16430">for</wd>

<space/>

<wd l="6974" t="16301" r="8035" b="16469">Computational</wd>

<space/>

<wd l="8078" t="16301" r="8870" b="16469">Linguistics</wd>

</run>

</ln>

</para>

</column>

</section>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1409" marginTop="1417" marginRight="1407" marginBottom="1302" offsetX="2" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1409" t="1417" r="10502" b="15398">

<column l="1409" t="1417" r="5796" b="15398">

<para l="1416" t="1464" r="5784" b="2885" alignment="justified" spaceBefore="4" lsp="exactly" lspExact="252" language="en">

<ln l="1416" t="1464" r="5779" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="1464" r="1915" b="1622">tional</wd>

<space/>

<wd l="1978" t="1469" r="2390" b="1622">NLP</wd>

<space/>

<wd l="2453" t="1512" r="3110" b="1666">process</wd>

<space/>

<wd l="3178" t="1464" r="3317" b="1622">is</wd>

<space/>

<wd l="3384" t="1464" r="4536" b="1666">implemented</wd>

<space/>

<wd l="4594" t="1464" r="5237" b="1666">(Sproat</wd>

<space/>

<wd l="5304" t="1483" r="5458" b="1622">et</wd>

<space/>

<wd l="5520" t="1464" r="5779" b="1656">al.,</wd>

<space/>

</ln>

<ln l="1421" t="1714" r="5784" b="1915" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1714" r="1978" b="1915">2001).</wd>

<space/>

<wd l="2054" t="1714" r="2294" b="1872">As</wd>

<space/>

<wd l="2371" t="1714" r="3029" b="1872">defined</wd>

<space/>

<wd l="3086" t="1714" r="3312" b="1915">by</wd>

<space/>

<wd l="3374" t="1714" r="3730" b="1906">Liu,</wd>

<space/>

<wd l="3802" t="1718" r="4373" b="1915">Weng,</wd>

<space/>

<wd l="4450" t="1718" r="5021" b="1915">Wang,</wd>

<space/>

<wd l="5098" t="1714" r="5419" b="1872">and</wd>

<space/>

<wd l="5482" t="1714" r="5784" b="1872">Liu</wd>

<space/>

</ln>

<ln l="1421" t="1968" r="5784" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1968" r="2054" b="2170">(2011),</wd>

<space/>

<wd l="2198" t="1968" r="2698" b="2126">“Text</wd>

<space/>

<wd l="2832" t="2016" r="3571" b="2170">message</wd>

<space/>

<wd l="3710" t="1968" r="4934" b="2126">normalization</wd>

<space/>

<wd l="5074" t="1968" r="5477" b="2126">aims</wd>

<space/>

<wd l="5616" t="1987" r="5784" b="2126">to</wd>

<space/>

</ln>

<ln l="1416" t="2222" r="5779" b="2424" baseLine="2371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="2222" r="2054" b="2424">replace</wd>

<space/>

<wd l="2117" t="2222" r="2386" b="2381">the</wd>

<space/>

<wd l="2453" t="2222" r="3605" b="2381">non-standard</wd>

<space/>

<wd l="3662" t="2222" r="4229" b="2381">tokens</wd>

<space/>

<wd l="4301" t="2222" r="4632" b="2381">that</wd>

<space/>

<wd l="4699" t="2270" r="5146" b="2424">carry</wd>

<space/>

<wd l="5218" t="2222" r="5779" b="2424">signif-</wd>

</ln>

<ln l="1421" t="2472" r="5784" b="2674" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="2472" r="1848" b="2630">icant</wd>

<space/>

<wd l="2026" t="2472" r="2861" b="2674">meanings</wd>

<space/>

<wd l="3043" t="2472" r="3437" b="2630">with</wd>

<space/>

<wd l="3614" t="2472" r="3878" b="2630">the</wd>

<space/>

<wd l="4066" t="2472" r="5784" b="2674">context-appropriate</wd>

<space/>

</ln>

<ln l="1426" t="2726" r="2909" b="2885" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2726" r="2170" b="2885">standard</wd>

<space/>

<wd l="2218" t="2726" r="2909" b="2885">words.”</wd>

</ln>

</para>

<para l="1411" t="2981" r="5789" b="5712" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="2981" r="5779" b="3182" baseLine="3130" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2981" r="2035" b="3139">This</wd>

<space/>

<wd l="2098" t="3029" r="2592" b="3182">paper</wd>

<space/>

<wd l="2650" t="3029" r="3432" b="3182">proposes</wd>

<space/>

<wd l="3509" t="3029" r="3605" b="3139">a</wd>

<space/>

<wd l="3667" t="2981" r="4147" b="3139">novel</wd>

<space/>

<wd l="4219" t="2981" r="5438" b="3139">normalization</wd>

<space/>

<wd l="5510" t="3029" r="5779" b="3182">ap-</wd>

</ln>

<ln l="1411" t="3235" r="5784" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="3235" r="2016" b="3437">proach</wd>

<space/>

<wd l="2093" t="3235" r="2347" b="3394">for</wd>

<space/>

<wd l="2414" t="3235" r="3062" b="3394">Twitter</wd>

<space/>

<wd l="3130" t="3283" r="4003" b="3437">messages.</wd>

<space/>

<wd l="4085" t="3235" r="4733" b="3394">Twitter</wd>

<space/>

<wd l="4805" t="3235" r="4944" b="3394">is</wd>

<space/>

<wd l="5016" t="3235" r="5285" b="3394">the</wd>

<space/>

<wd l="5357" t="3254" r="5784" b="3394">most</wd>

<space/>

</ln>

<ln l="1411" t="3485" r="5789" b="3686" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="3485" r="2093" b="3686">popular</wd>

<space/>

<wd l="2184" t="3485" r="3480" b="3686">microblogging</wd>

<space/>

<wd l="3581" t="3485" r="4195" b="3643">service</wd>

<space/>

<wd l="4296" t="3485" r="4464" b="3638">in</wd>

<space/>

<wd l="4555" t="3485" r="4824" b="3643">the</wd>

<space/>

<wd l="4920" t="3485" r="5438" b="3643">world</wd>

<space/>

<wd l="5530" t="3485" r="5789" b="3643">for</wd>

<space/>

</ln>

<ln l="1416" t="3739" r="5784" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3739" r="2616" b="3941">news-casting,</wd>

<space/>

<wd l="2746" t="3739" r="3389" b="3941">sharing</wd>

<space/>

<wd l="3499" t="3739" r="4306" b="3941">thoughts,</wd>

<space/>

<wd l="4435" t="3739" r="4752" b="3898">and</wd>

<space/>

<wd l="4872" t="3739" r="5501" b="3941">staying</wd>

<space/>

<wd l="5616" t="3739" r="5784" b="3893">in</wd>

<space/>

</ln>

<ln l="1416" t="3994" r="5784" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3994" r="1910" b="4152">touch</wd>

<space/>

<wd l="2002" t="3994" r="2390" b="4152">with</wd>

<space/>

<wd l="2486" t="3994" r="3139" b="4152">friends.</wd>

<space/>

<wd l="3250" t="3994" r="3730" b="4152">Since</wd>

<space/>

<wd l="3826" t="3994" r="4027" b="4152">its</wd>

<space/>

<wd l="4123" t="3994" r="4632" b="4152">initial</wd>

<space/>

<wd l="4733" t="3994" r="5520" b="4195">founding</wd>

<space/>

<wd l="5616" t="3994" r="5784" b="4147">in</wd>

<space/>

</ln>

<ln l="1421" t="4243" r="5779" b="4445" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="4243" r="1906" b="4435">2006,</wd>

<space/>

<wd l="1973" t="4243" r="2093" b="4402">it</wd>

<space/>

<wd l="2150" t="4243" r="2438" b="4402">has</wd>

<space/>

<wd l="2501" t="4243" r="3259" b="4445">gathered</wd>

<space/>

<wd l="3312" t="4243" r="4114" b="4402">hundreds</wd>

<space/>

<wd l="4176" t="4243" r="4378" b="4402">of</wd>

<space/>

<wd l="4411" t="4243" r="5131" b="4402">millions</wd>

<space/>

<wd l="5194" t="4243" r="5395" b="4402">of</wd>

<space/>

<wd l="5429" t="4291" r="5779" b="4445">reg-</wd>

</ln>

<ln l="1421" t="4498" r="5784" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="4498" r="2011" b="4656">istered</wd>

<space/>

<wd l="2098" t="4546" r="2592" b="4656">users.</wd>

<space/>

<wd l="2698" t="4502" r="3326" b="4656">Tweets</wd>

<space/>

<wd l="3422" t="4498" r="3840" b="4656">refer</wd>

<space/>

<wd l="3926" t="4517" r="4094" b="4656">to</wd>

<space/>

<wd l="4190" t="4546" r="5016" b="4699">messages</wd>

<space/>

<wd l="5122" t="4517" r="5472" b="4656">sent</wd>

<space/>

<wd l="5568" t="4546" r="5784" b="4656">on</wd>

<space/>

</ln>

<ln l="1421" t="4752" r="5779" b="4944" baseLine="4901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="4752" r="2117" b="4944">Twitter,</wd>

<space/>

<wd l="2194" t="4752" r="2731" b="4910">which</wd>

<space/>

<wd l="2803" t="4752" r="2942" b="4910">is</wd>

<space/>

<wd l="3014" t="4752" r="3840" b="4910">restricted</wd>

<space/>

<wd l="3902" t="4771" r="4070" b="4910">to</wd>

<space/>

<wd l="4171" t="4752" r="4474" b="4910">140</wd>

<space/>

<wd l="4550" t="4752" r="5486" b="4944">characters,</wd>

<space/>

<wd l="5568" t="4752" r="5779" b="4910">20</wd>

<space/>

</ln>

<ln l="1421" t="5006" r="5774" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5006" r="2304" b="5165">characters</wd>

<space/>

<wd l="2424" t="5006" r="2746" b="5165">less</wd>

<space/>

<wd l="2856" t="5006" r="3230" b="5165">than</wd>

<space/>

<wd l="3341" t="5006" r="3610" b="5165">the</wd>

<space/>

<wd l="3744" t="5006" r="4042" b="5165">160</wd>

<space/>

<wd l="4162" t="5006" r="4858" b="5165">allowed</wd>

<space/>

<wd l="4958" t="5006" r="5184" b="5208">by</wd>

<space/>

<wd l="5299" t="5006" r="5774" b="5165">SMS.</wd>

<space/>

</ln>

<ln l="1416" t="5256" r="5779" b="5448" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5261" r="2150" b="5414">Because</wd>

<space/>

<wd l="2266" t="5256" r="2467" b="5414">of</wd>

<space/>

<wd l="2554" t="5256" r="2866" b="5414">this</wd>

<space/>

<wd l="2986" t="5256" r="3883" b="5448">limitation,</wd>

<space/>

<wd l="4003" t="5304" r="4450" b="5414">users</wd>

<space/>

<wd l="4565" t="5256" r="4978" b="5414">have</wd>

<space/>

<wd l="5088" t="5275" r="5256" b="5414">to</wd>

<space/>

<wd l="5371" t="5275" r="5779" b="5414">tran-</wd>

</ln>

<ln l="1426" t="5510" r="5597" b="5712" baseLine="5659" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5510" r="1944" b="5669">scribe</wd>

<space/>

<wd l="2002" t="5515" r="2630" b="5669">Tweets</wd>

<space/>

<wd l="2688" t="5510" r="3082" b="5669">with</wd>

<space/>

<wd l="3139" t="5558" r="3317" b="5669">as</wd>

<space/>

<wd l="3374" t="5510" r="3864" b="5669">much</wd>

<space/>

<wd l="3912" t="5510" r="4541" b="5712">brevity</wd>

<space/>

<wd l="4598" t="5558" r="4776" b="5669">as</wd>

<space/>

<wd l="4829" t="5510" r="5597" b="5712">possible.</wd>

</ln>

</para>

<para l="1416" t="5765" r="5789" b="8813" alignment="justified" spaceBefore="65" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="5765" r="5779" b="5923" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="5765" r="1987" b="5923">The</wd>

<space/>

<wd l="2136" t="5765" r="3360" b="5923">normalization</wd>

<space/>

<wd l="3504" t="5765" r="3970" b="5923">bears</wd>

<space/>

<wd l="4123" t="5813" r="4219" b="5923">a</wd>

<space/>

<wd l="4368" t="5765" r="5462" b="5923">resemblance</wd>

<space/>

<wd l="5616" t="5784" r="5779" b="5923">to</wd>

<space/>

</ln>

<ln l="1426" t="6014" r="5779" b="6216" baseLine="6163" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6014" r="2117" b="6216">spelling</wd>

<space/>

<wd l="2179" t="6014" r="3110" b="6173">correction.</wd>

<space/>

<wd l="3182" t="6014" r="3518" b="6173">The</wd>

<space/>

<wd l="3581" t="6014" r="4296" b="6173">ultimate</wd>

<space/>

<wd l="4363" t="6014" r="4733" b="6216">goal</wd>

<space/>

<wd l="4805" t="6014" r="5002" b="6173">of</wd>

<space/>

<wd l="5040" t="6014" r="5578" b="6173">which</wd>

<space/>

<wd l="5640" t="6014" r="5779" b="6173">is</wd>

<space/>

</ln>

<ln l="1416" t="6269" r="5784" b="6427" baseLine="6418" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="6269" r="1685" b="6427">the</wd>

<space/>

<wd l="1762" t="6269" r="2563" b="6427">detection</wd>

<space/>

<wd l="2640" t="6269" r="2957" b="6427">and</wd>

<space/>

<wd l="3024" t="6269" r="3912" b="6427">correction</wd>

<space/>

<wd l="3984" t="6269" r="4186" b="6427">of</wd>

<space/>

<wd l="4238" t="6269" r="4709" b="6427">OOV</wd>

<space/>

<wd l="4781" t="6269" r="5362" b="6427">words.</wd>

<space/>

<wd l="5448" t="6269" r="5784" b="6427">The</wd>

<space/>

</ln>

<ln l="1426" t="6523" r="5779" b="6725" baseLine="6672" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6523" r="2117" b="6725">spelling</wd>

<space/>

<wd l="2208" t="6523" r="3101" b="6682">correction</wd>

<space/>

<wd l="3192" t="6523" r="3931" b="6682">methods</wd>

<space/>

<wd l="4032" t="6523" r="4421" b="6725">only</wd>

<space/>

<wd l="4517" t="6523" r="4987" b="6682">focus</wd>

<space/>

<wd l="5088" t="6571" r="5304" b="6682">on</wd>

<space/>

<wd l="5395" t="6523" r="5779" b="6682">mis-</wd>

</ln>

<ln l="1426" t="6773" r="5779" b="6974" baseLine="6922" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6773" r="2050" b="6974">spelled</wd>

<space/>

<wd l="2122" t="6773" r="2659" b="6931">words</wd>

<space/>

<wd l="2741" t="6773" r="3230" b="6931">while</wd>

<space/>

<wd l="3312" t="6773" r="4536" b="6931">normalization</wd>

<space/>

<wd l="4622" t="6792" r="5304" b="6974">systems</wd>

<space/>

<wd l="5395" t="6821" r="5779" b="6931">con-</wd>

</ln>

<ln l="1426" t="7027" r="5779" b="7229" baseLine="7176" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="7027" r="1848" b="7186">sider</wd>

<space/>

<wd l="1910" t="7027" r="2122" b="7186">all</wd>

<space/>

<wd l="2194" t="7027" r="2693" b="7186">forms</wd>

<space/>

<wd l="2765" t="7027" r="2966" b="7186">of</wd>

<space/>

<wd l="3010" t="7027" r="3480" b="7186">OOV</wd>

<space/>

<wd l="3542" t="7027" r="4128" b="7219">words,</wd>

<space/>

<wd l="4205" t="7027" r="4598" b="7186">such</wd>

<space/>

<wd l="4666" t="7075" r="4838" b="7186">as</wd>

<space/>

<wd l="4906" t="7046" r="5779" b="7229">represent-</wd>

</ln>

<ln l="1421" t="7262" r="5779" b="7502" baseLine="7450">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1421" t="7301" r="1699" b="7502">ing</wd>

<space/>

<wd l="1776" t="7301" r="2376" b="7459">sounds</wd>

<space/>

<wd l="2443" t="7301" r="3538" b="7502">phonetically</wd>

<space/>

<wd l="3610" t="7301" r="3979" b="7502">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4066" t="7301" r="4272" b="7502">by</wd>

<space/>

<wd l="4344" t="7301" r="4608" b="7459">the</wd>

<space/>

<wd l="4680" t="7354" r="5035" b="7502">way</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="5107" t="7334" r="5314" b="7416">—</wd>

<space/>

</run>

<wd l="5395" t="7301" r="5779" b="7502"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">btw</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1421" t="7546" r="5774" b="7786" baseLine="7733">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1421" t="7584" r="1742" b="7742">and</wd>

<space/>

<wd l="1872" t="7584" r="2722" b="7742">shortened</wd>

<space/>

<wd l="2846" t="7584" r="3350" b="7742">forms</wd>

<space/>

<wd l="3485" t="7584" r="3859" b="7786">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4003" t="7594" r="4872" b="7786">university</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="4997" t="7618" r="5203" b="7699">—</wd>

<space/>

</run>

<wd l="5381" t="7584" r="5774" b="7786"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">uni</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1421" t="7853" r="5789" b="8054" baseLine="8002" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7853" r="1906" b="8045">Thus,</wd>

<space/>

<wd l="1978" t="7853" r="3202" b="8011">normalization</wd>

<space/>

<wd l="3269" t="7853" r="4248" b="8054">approaches</wd>

<space/>

<wd l="4325" t="7853" r="4906" b="8011">should</wd>

<space/>

<wd l="4968" t="7853" r="5621" b="8011">address</wd>

<space/>

<wd l="5693" t="7901" r="5789" b="8011">a</wd>

<space/>

</ln>

<ln l="1416" t="8102" r="5779" b="8304" baseLine="8251" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="8102" r="1982" b="8304">higher</wd>

<space/>

<wd l="2131" t="8102" r="2789" b="8261">volume</wd>

<space/>

<wd l="2947" t="8102" r="3149" b="8261">of</wd>

<space/>

<wd l="3283" t="8102" r="3754" b="8261">OOV</wd>

<space/>

<wd l="3902" t="8102" r="4440" b="8261">words</wd>

<space/>

<wd l="4598" t="8102" r="5467" b="8304">compared</wd>

<space/>

<wd l="5611" t="8122" r="5779" b="8261">to</wd>

<space/>

</ln>

<ln l="1426" t="8357" r="5784" b="8558" baseLine="8506" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8357" r="2117" b="8558">spelling</wd>

<space/>

<wd l="2198" t="8357" r="3086" b="8515">correction</wd>

<space/>

<wd l="3168" t="8357" r="4147" b="8558">approaches</wd>

<space/>

<wd l="4229" t="8357" r="4560" b="8515">that</wd>

<space/>

<wd l="4642" t="8357" r="5011" b="8515">lead</wd>

<space/>

<wd l="5083" t="8376" r="5251" b="8515">to</wd>

<space/>

<wd l="5333" t="8405" r="5784" b="8515">more</wd>

<space/>

</ln>

<ln l="1421" t="8611" r="2453" b="8813" baseLine="8760" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8611" r="2453" b="8813">complexity.</wd>

</ln>

</para>

<para l="1411" t="8861" r="5789" b="14587" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="8861" r="5784" b="9062" baseLine="9014" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="8866" r="1886" b="9019">To</wd>

<space/>

<wd l="1968" t="8861" r="2621" b="9019">address</wd>

<space/>

<wd l="2693" t="8861" r="3010" b="9019">this</wd>

<space/>

<wd l="3086" t="8861" r="4118" b="9062">complexity,</wd>

<space/>

<wd l="4200" t="8909" r="4454" b="9019">we</wd>

<space/>

<wd l="4526" t="8909" r="4819" b="9019">use</wd>

<space/>

<wd l="4891" t="8861" r="5784" b="9019">maximum</wd>

<space/>

</ln>

<ln l="1421" t="9115" r="5784" b="9317" baseLine="9264" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9134" r="2093" b="9317">entropy</wd>

<space/>

<wd l="2184" t="9115" r="2899" b="9317">(Berger,</wd>

<space/>

<wd l="2995" t="9115" r="3557" b="9307">Pietra,</wd>

<space/>

<wd l="3658" t="9115" r="3821" b="9274">&amp;</wd>

<space/>

<wd l="3912" t="9115" r="4474" b="9307">Pietra,</wd>

<space/>

<wd l="4594" t="9115" r="5054" b="9307">1996;</wd>

<space/>

<wd l="5165" t="9115" r="5525" b="9274">Och</wd>

<space/>

<wd l="5621" t="9115" r="5784" b="9274">&amp;</wd>

<space/>

</ln>

<ln l="1411" t="9365" r="5779" b="9566" baseLine="9518" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9370" r="1829" b="9566">Ney,</wd>

<space/>

<wd l="1925" t="9365" r="2429" b="9566">2002)</wd>

<space/>

<wd l="2520" t="9365" r="2774" b="9523">for</wd>

<space/>

<wd l="2851" t="9365" r="3586" b="9566">utilizing</wd>

<space/>

<wd l="3672" t="9365" r="3989" b="9523">and</wd>

<space/>

<wd l="4070" t="9365" r="5251" b="9566">incorporating</wd>

<space/>

<wd l="5333" t="9413" r="5779" b="9523">more</wd>

<space/>

</ln>

<ln l="1411" t="9619" r="5784" b="9821" baseLine="9773" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9619" r="2386" b="9821">probability</wd>

<space/>

<wd l="2472" t="9619" r="3331" b="9778">functions.</wd>

<space/>

<wd l="3432" t="9619" r="3773" b="9778">Our</wd>

<space/>

<wd l="3859" t="9619" r="4661" b="9821">approach</wd>

<space/>

<wd l="4752" t="9619" r="4886" b="9778">is</wd>

<space/>

<wd l="4973" t="9619" r="5486" b="9778">based</wd>

<space/>

<wd l="5568" t="9667" r="5784" b="9778">on</wd>

<space/>

</ln>

<ln l="1416" t="9874" r="5784" b="10075" baseLine="10022" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="9874" r="1685" b="10032">the</wd>

<space/>

<wd l="1781" t="9874" r="2717" b="10075">hypothesis</wd>

<space/>

<wd l="2808" t="9874" r="3139" b="10032">that</wd>

<space/>

<wd l="3235" t="9874" r="4186" b="10075">integrating</wd>

<space/>

<wd l="4277" t="9922" r="4723" b="10032">more</wd>

<space/>

<wd l="4814" t="9874" r="5784" b="10075">probability</wd>

<space/>

</ln>

<ln l="1421" t="10128" r="5784" b="10330" baseLine="10277" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10128" r="2232" b="10286">functions</wd>

<space/>

<wd l="2386" t="10128" r="2726" b="10286">will</wd>

<space/>

<wd l="2875" t="10128" r="3355" b="10286">boost</wd>

<space/>

<wd l="3504" t="10128" r="3773" b="10286">the</wd>

<space/>

<wd l="3917" t="10128" r="5030" b="10330">performance</wd>

<space/>

<wd l="5189" t="10128" r="5390" b="10286">of</wd>

<space/>

<wd l="5515" t="10128" r="5784" b="10286">the</wd>

<space/>

</ln>

<ln l="1416" t="10378" r="5789" b="10570" baseLine="10531" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10378" r="2126" b="10570">method;</wd>

<space/>

<wd l="2222" t="10378" r="3029" b="10570">however,</wd>

<space/>

<wd l="3115" t="10378" r="3384" b="10536">the</wd>

<space/>

<wd l="3475" t="10378" r="4262" b="10536">available</wd>

<space/>

<wd l="4349" t="10378" r="5386" b="10536">information</wd>

<space/>

<wd l="5472" t="10378" r="5789" b="10536">and</wd>

<space/>

</ln>

<ln l="1416" t="10632" r="5760" b="10834" baseLine="10781">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1416" t="10632" r="2093" b="10790">number</wd>

<space/>

<wd l="2165" t="10632" r="2366" b="10790">of</wd>

<space/>

<wd l="2414" t="10632" r="3384" b="10834">probability</wd>

<space/>

<wd l="3461" t="10632" r="4272" b="10790">functions</wd>

<space/>

<wd l="4354" t="10632" r="4608" b="10790">for</wd>

<space/>

</run>

<wd l="4680" t="10632" r="5232" b="10834"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">OOV</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5275" t="10632" r="5760" b="10814">word,</wd>

<space/>

</run>

</ln>

<ln l="1411" t="10886" r="5779" b="11088" baseLine="11035">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1411" t="10886" r="2213" b="11045">standard</wd>

<space/>

</run>

<wd l="2290" t="10886" r="2808" b="11088"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">word</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2899" t="10886" r="3326" b="11088">pairs</wd>

<space/>

<wd l="3427" t="10934" r="3686" b="11045">are</wd>

<space/>

<wd l="3782" t="10886" r="4387" b="11088">always</wd>

<space/>

<wd l="4483" t="10886" r="5146" b="11045">limited.</wd>

<space/>

<wd l="5246" t="10886" r="5779" b="11045">Maxi-</wd>

</run>

</ln>

<ln l="1416" t="11136" r="5789" b="11338" baseLine="11290" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11184" r="1872" b="11294">mum</wd>

<space/>

<wd l="1968" t="11155" r="2640" b="11338">entropy</wd>

<space/>

<wd l="2736" t="11136" r="3542" b="11338">(Maxent)</wd>

<space/>

<wd l="3638" t="11136" r="4397" b="11338">provides</wd>

<space/>

<wd l="4502" t="11184" r="4598" b="11294">a</wd>

<space/>

<wd l="4694" t="11136" r="5434" b="11294">criterion</wd>

<space/>

<wd l="5534" t="11136" r="5789" b="11294">for</wd>

<space/>

</ln>

<ln l="1421" t="11390" r="5784" b="11592" baseLine="11544" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11390" r="2371" b="11592">integrating</wd>

<space/>

<wd l="2520" t="11390" r="3494" b="11592">probability</wd>

<space/>

<wd l="3653" t="11390" r="4747" b="11549">distributions</wd>

<space/>

<wd l="4901" t="11390" r="5414" b="11549">based</wd>

<space/>

<wd l="5568" t="11438" r="5784" b="11549">on</wd>

<space/>

</ln>

<ln l="1411" t="11645" r="5784" b="11846" baseLine="11794" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="11645" r="1978" b="11846">partial</wd>

<space/>

<wd l="2122" t="11645" r="3130" b="11846">knowledge.</wd>

<space/>

<wd l="3288" t="11645" r="3624" b="11803">The</wd>

<space/>

<wd l="3763" t="11650" r="4440" b="11803">Maxent</wd>

<space/>

<wd l="4574" t="11645" r="5371" b="11846">produces</wd>

<space/>

<wd l="5515" t="11645" r="5784" b="11803">the</wd>

<space/>

</ln>

<ln l="1421" t="11894" r="5779" b="12096" baseLine="12048" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11894" r="1992" b="12053">lowest</wd>

<space/>

<wd l="2126" t="11894" r="2698" b="12053">biased</wd>

<space/>

<wd l="2837" t="11894" r="3749" b="12053">estimation</wd>

<space/>

<wd l="3893" t="11942" r="4109" b="12053">on</wd>

<space/>

<wd l="4248" t="11894" r="4512" b="12053">the</wd>

<space/>

<wd l="4661" t="11894" r="5141" b="12096">given</wd>

<space/>

<wd l="5285" t="11894" r="5779" b="12053">infor-</wd>

</ln>

<ln l="1416" t="12149" r="5784" b="12350" baseLine="12302" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12149" r="2078" b="12341">mation,</wd>

<space/>

<wd l="2155" t="12149" r="2486" b="12307">that</wd>

<space/>

<wd l="2563" t="12149" r="2755" b="12341">is,</wd>

<space/>

<wd l="2837" t="12149" r="2957" b="12307">it</wd>

<space/>

<wd l="3029" t="12149" r="3168" b="12307">is</wd>

<space/>

<wd l="3245" t="12149" r="4190" b="12350">maximally</wd>

<space/>

<wd l="4258" t="12149" r="4867" b="12307">neutral</wd>

<space/>

<wd l="4939" t="12149" r="5784" b="12350">regarding</wd>

<space/>

</ln>

<ln l="1416" t="12403" r="5779" b="12605" baseLine="12552" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12403" r="2102" b="12605">missing</wd>

<space/>

<wd l="2222" t="12403" r="3302" b="12562">information.</wd>

<space/>

<wd l="3427" t="12403" r="3950" b="12562">When</wd>

<space/>

<wd l="4070" t="12403" r="4800" b="12605">defining</wd>

<space/>

<wd l="4920" t="12451" r="5371" b="12562">some</wd>

<space/>

<wd l="5491" t="12451" r="5779" b="12562">un-</wd>

</ln>

<ln l="1416" t="12658" r="5789" b="12850" baseLine="12806" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12658" r="2016" b="12816">known</wd>

<space/>

<wd l="2083" t="12677" r="2635" b="12816">events</wd>

<space/>

<wd l="2702" t="12658" r="3096" b="12816">with</wd>

<space/>

<wd l="3163" t="12706" r="3259" b="12816">a</wd>

<space/>

<wd l="3326" t="12658" r="4142" b="12816">statistical</wd>

<space/>

<wd l="4210" t="12658" r="4810" b="12850">model,</wd>

<space/>

<wd l="4877" t="12706" r="5131" b="12816">we</wd>

<space/>

<wd l="5208" t="12658" r="5789" b="12816">should</wd>

<space/>

</ln>

<ln l="1421" t="12907" r="5770" b="13109" baseLine="13061" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="12907" r="2026" b="13109">always</wd>

<space/>

<wd l="2098" t="12907" r="2592" b="13066">select</wd>

<space/>

<wd l="2650" t="12907" r="2918" b="13066">the</wd>

<space/>

<wd l="2986" t="12955" r="3298" b="13066">one</wd>

<space/>

<wd l="3360" t="12907" r="3691" b="13066">that</wd>

<space/>

<wd l="3749" t="12907" r="4037" b="13066">has</wd>

<space/>

<wd l="4104" t="12907" r="4997" b="13066">maximum</wd>

<space/>

<wd l="5059" t="12926" r="5770" b="13109">entropy.</wd>

<space/>

</ln>

<ln l="1416" t="13162" r="5784" b="13363" baseLine="13315" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="13162" r="2251" b="13363">Although</wd>

<space/>

<wd l="2342" t="13162" r="2611" b="13320">the</wd>

<space/>

<wd l="2707" t="13166" r="3379" b="13320">Maxent</wd>

<space/>

<wd l="3475" t="13162" r="3763" b="13320">has</wd>

<space/>

<wd l="3864" t="13162" r="4512" b="13363">already</wd>

<space/>

<wd l="4598" t="13162" r="5021" b="13320">been</wd>

<space/>

<wd l="5117" t="13162" r="5525" b="13320">used</wd>

<space/>

<wd l="5616" t="13162" r="5784" b="13315">in</wd>

<space/>

</ln>

<ln l="1416" t="13416" r="5784" b="13618" baseLine="13565" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="13416" r="1685" b="13574">the</wd>

<space/>

<wd l="1790" t="13416" r="3014" b="13574">normalization</wd>

<space/>

<wd l="3130" t="13416" r="3691" b="13618">sphere</wd>

<space/>

<wd l="3802" t="13416" r="4171" b="13618">(e.g.</wd>

<space/>

<wd l="4291" t="13416" r="4949" b="13574">Pennell</wd>

<space/>

<wd l="5059" t="13416" r="5381" b="13574">and</wd>

<space/>

<wd l="5477" t="13416" r="5784" b="13574">Liu</wd>

<space/>

</ln>

<ln l="1421" t="13666" r="5789" b="13867" baseLine="13819" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13666" r="1997" b="13867">(2010)</wd>

<space/>

<wd l="2069" t="13666" r="2736" b="13824">utilized</wd>

<space/>

<wd l="2794" t="13670" r="3466" b="13824">Maxent</wd>

<space/>

<wd l="3528" t="13685" r="3696" b="13824">to</wd>

<space/>

<wd l="3768" t="13666" r="4440" b="13867">classify</wd>

<space/>

<wd l="4507" t="13666" r="5789" b="13824">deletion-based</wd>

<space/>

</ln>

<ln l="1421" t="13920" r="5779" b="14122" baseLine="14074" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13920" r="2726" b="14122">abbreviations),</wd>

<space/>

<wd l="2837" t="13920" r="3149" b="14078">this</wd>

<space/>

<wd l="3254" t="13968" r="3749" b="14122">paper</wd>

<space/>

<wd l="3854" t="13920" r="4579" b="14122">explains</wd>

<space/>

<wd l="4685" t="13920" r="5064" b="14078">how</wd>

<space/>

<wd l="5170" t="13939" r="5338" b="14078">to</wd>

<space/>

<wd l="5448" t="13968" r="5779" b="14078">em-</wd>

</ln>

<ln l="1411" t="14174" r="5789" b="14376" baseLine="14323" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="14174" r="1810" b="14376">ploy</wd>

<space/>

<wd l="1925" t="14179" r="2597" b="14333">Maxent</wd>

<space/>

<wd l="2717" t="14174" r="2971" b="14333">for</wd>

<space/>

<wd l="3096" t="14174" r="3869" b="14376">selecting</wd>

<space/>

<wd l="3979" t="14174" r="4248" b="14333">the</wd>

<space/>

<wd l="4363" t="14174" r="5789" b="14333">best-normalized</wd>

<space/>

</ln>

<ln l="1421" t="14429" r="2304" b="14587" baseLine="14578" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14429" r="2304" b="14587">candidate.</wd>

</ln>

</para>

<para l="1416" t="14678" r="5779" b="15389" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1646" t="14678" r="5779" b="14880" baseLine="14832" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="14683" r="1949" b="14837">We</wd>

<space/>

<wd l="2016" t="14678" r="2429" b="14837">have</wd>

<space/>

<wd l="2496" t="14678" r="3403" b="14880">developed</wd>

<space/>

<wd l="3461" t="14726" r="3557" b="14837">a</wd>

<space/>

<wd l="3619" t="14678" r="4282" b="14837">method</wd>

<space/>

<wd l="4339" t="14678" r="4670" b="14837">that</wd>

<space/>

<wd l="4738" t="14678" r="5131" b="14837">does</wd>

<space/>

<wd l="5198" t="14698" r="5482" b="14837">not</wd>

<space/>

<wd l="5539" t="14726" r="5779" b="14837">re-</wd>

</ln>

<ln l="1421" t="14933" r="5779" b="15134" baseLine="15082" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14933" r="1867" b="15134">quire</wd>

<space/>

<wd l="1978" t="14933" r="2832" b="15091">annotated</wd>

<space/>

<wd l="2928" t="14933" r="3614" b="15134">training</wd>

<space/>

<wd l="3715" t="14933" r="4080" b="15091">data</wd>

<space/>

<wd l="4181" t="14933" r="4502" b="15091">and</wd>

<space/>

<wd l="4598" t="14933" r="4718" b="15091">it</wd>

<space/>

<wd l="4819" t="14933" r="5779" b="15091">normalizes</wd>

<space/>

</ln>

<ln l="1416" t="15187" r="5779" b="15389" baseLine="15336" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="15235" r="2030" b="15346">unseen</wd>

<space/>

<wd l="2189" t="15187" r="2592" b="15346">data.</wd>

<space/>

<wd l="2760" t="15192" r="3211" b="15346">Most</wd>

<space/>

<wd l="3370" t="15187" r="3571" b="15346">of</wd>

<space/>

<wd l="3706" t="15187" r="3970" b="15346">the</wd>

<space/>

<wd l="4123" t="15187" r="5347" b="15346">normalization</wd>

<space/>

<wd l="5506" t="15235" r="5779" b="15389">ap-</wd>

</ln>

<ln l="0" t="0" r="0" b="0" baseLine="0" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<space/>

</ln>

</para>

</column>

<column l="6115" t="1417" r="10502" b="15398">

<para l="6115" t="1459" r="10493" b="4699" alignment="justified" lsp="exactly" lspExact="252" language="en">

<ln l="6115" t="1459" r="10488" b="1661" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="1459" r="6902" b="1661">proaches</wd>

<space/>

<wd l="7018" t="1459" r="8122" b="1661">substantially</wd>

<space/>

<wd l="8232" t="1459" r="8870" b="1661">depend</wd>

<space/>

<wd l="8976" t="1507" r="9192" b="1618">on</wd>

<space/>

<wd l="9298" t="1459" r="9562" b="1618">the</wd>

<space/>

<wd l="9672" t="1459" r="10488" b="1661">manually</wd>

<space/>

</ln>

<ln l="6125" t="1714" r="10488" b="1915" baseLine="1862" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1714" r="6984" b="1872">annotated</wd>

<space/>

<wd l="7075" t="1714" r="7488" b="1906">data,</wd>

<space/>

<wd l="7589" t="1714" r="8074" b="1872">while</wd>

<space/>

<wd l="8170" t="1714" r="8438" b="1872">the</wd>

<space/>

<wd l="8534" t="1714" r="9173" b="1872">labeled</wd>

<space/>

<wd l="9269" t="1714" r="9629" b="1872">data</wd>

<space/>

<wd l="9725" t="1714" r="9864" b="1872">is</wd>

<space/>

<wd l="9970" t="1714" r="10488" b="1915">costly</wd>

<space/>

</ln>

<ln l="6125" t="1968" r="10488" b="2170" baseLine="2117" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1968" r="6446" b="2126">and</wd>

<space/>

<wd l="6566" t="1968" r="6955" b="2126">time</wd>

<space/>

<wd l="7090" t="1968" r="8050" b="2170">consuming</wd>

<space/>

<wd l="8174" t="1987" r="8342" b="2126">to</wd>

<space/>

<wd l="8467" t="2016" r="9178" b="2170">prepare.</wd>

<space/>

<wd l="9312" t="1973" r="9614" b="2126">We</wd>

<space/>

<wd l="9749" t="1987" r="10488" b="2170">generate</wd>

<space/>

</ln>

<ln l="6120" t="2218" r="10483" b="2376" baseLine="2371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="2218" r="7118" b="2376">normalized</wd>

<space/>

<wd l="7234" t="2218" r="8150" b="2376">candidates</wd>

<space/>

<wd l="8275" t="2218" r="8530" b="2376">for</wd>

<space/>

<wd l="8645" t="2218" r="9048" b="2376">each</wd>

<space/>

<wd l="9168" t="2218" r="9902" b="2376">detected</wd>

<space/>

<wd l="10018" t="2218" r="10483" b="2376">OOV</wd>

<space/>

</ln>

<ln l="6115" t="2472" r="10483" b="2674" baseLine="2621" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="2472" r="6629" b="2630">based</wd>

<space/>

<wd l="6725" t="2520" r="6936" b="2630">on</wd>

<space/>

<wd l="7037" t="2472" r="7666" b="2664">lexical,</wd>

<space/>

<wd l="7762" t="2472" r="8678" b="2674">phonemic,</wd>

<space/>

<wd l="8784" t="2472" r="9106" b="2630">and</wd>

<space/>

<wd l="9192" t="2472" r="10483" b="2674">morphophone-</wd>

</ln>

<ln l="6120" t="2726" r="10483" b="2928" baseLine="2875" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="2726" r="6442" b="2885">mic</wd>

<space/>

<wd l="6542" t="2726" r="7454" b="2885">variations.</wd>

<space/>

<wd l="7560" t="2731" r="7733" b="2880">In</wd>

<space/>

<wd l="7829" t="2726" r="8597" b="2918">addition,</wd>

<space/>

<wd l="8702" t="2726" r="9144" b="2885">since</wd>

<space/>

<wd l="9240" t="2774" r="9533" b="2885">our</wd>

<space/>

<wd l="9619" t="2746" r="10118" b="2928">target</wd>

<space/>

<wd l="10214" t="2726" r="10483" b="2885">da-</wd>

</ln>

<ln l="6120" t="2976" r="10483" b="3178" baseLine="3130" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="2995" r="6528" b="3134">taset</wd>

<space/>

<wd l="6595" t="3024" r="7738" b="3178">encompasses</wd>

<space/>

<wd l="7810" t="2976" r="8458" b="3134">Twitter</wd>

<space/>

<wd l="8520" t="3024" r="9346" b="3178">messages</wd>

<space/>

<wd l="9422" t="2976" r="9850" b="3134">from</wd>

<space/>

<wd l="9922" t="2976" r="10483" b="3178">Singa-</wd>

</ln>

<ln l="6115" t="3230" r="10493" b="3432" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="3278" r="6802" b="3432">poreans</wd>

<space/>

<wd l="6893" t="3230" r="7210" b="3389">and</wd>

<space/>

<wd l="7291" t="3230" r="8630" b="3432">code-switching</wd>

<space/>

<wd l="8702" t="3230" r="9446" b="3389">between</wd>

<space/>

<wd l="9528" t="3230" r="10090" b="3432">Malay</wd>

<space/>

<wd l="10171" t="3230" r="10493" b="3389">and</wd>

<space/>

</ln>

<ln l="6120" t="3485" r="10483" b="3686" baseLine="3634" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3485" r="6797" b="3686">English</wd>

<space/>

<wd l="6941" t="3485" r="7075" b="3643">is</wd>

<space/>

<wd l="7224" t="3485" r="7954" b="3686">frequent</wd>

<space/>

<wd l="8098" t="3485" r="8266" b="3638">in</wd>

<space/>

<wd l="8400" t="3485" r="8669" b="3643">the</wd>

<space/>

<wd l="8813" t="3485" r="9470" b="3677">dataset,</wd>

<space/>

<wd l="9619" t="3533" r="9715" b="3643">a</wd>

<space/>

<wd l="9850" t="3485" r="10483" b="3686">Malay-</wd>

</ln>

<ln l="6120" t="3739" r="10483" b="3941" baseLine="3888" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3739" r="6797" b="3941">English</wd>

<space/>

<wd l="6888" t="3739" r="7776" b="3941">dictionary</wd>

<space/>

<wd l="7867" t="3739" r="8006" b="3898">is</wd>

<space/>

<wd l="8098" t="3739" r="8760" b="3898">utilized</wd>

<space/>

<wd l="8846" t="3758" r="9014" b="3898">to</wd>

<space/>

<wd l="9110" t="3758" r="9850" b="3941">generate</wd>

<space/>

<wd l="9941" t="3739" r="10483" b="3898">candi-</wd>

</ln>

<ln l="6125" t="3989" r="10483" b="4190" baseLine="4138" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3989" r="6571" b="4147">dates</wd>

<space/>

<wd l="6648" t="3989" r="6902" b="4147">for</wd>

<space/>

<wd l="6965" t="3989" r="7531" b="4190">Malay</wd>

<space/>

<wd l="7594" t="3989" r="8179" b="4147">words.</wd>

<space/>

<wd l="8256" t="3989" r="8928" b="4190">Finally,</wd>

<space/>

<wd l="9000" t="3989" r="9893" b="4147">maximum</wd>

<space/>

<wd l="9965" t="4008" r="10483" b="4147">entro-</wd>

</ln>

<ln l="6115" t="4243" r="10483" b="4445" baseLine="4392" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="4291" r="6341" b="4445">py</wd>

<space/>

<wd l="6413" t="4262" r="7138" b="4445">presents</wd>

<space/>

<wd l="7224" t="4291" r="7320" b="4402">a</wd>

<space/>

<wd l="7387" t="4243" r="8237" b="4402">backbone</wd>

<space/>

<wd l="8314" t="4262" r="8482" b="4402">to</wd>

<space/>

<wd l="8563" t="4243" r="9317" b="4402">combine</wd>

<space/>

<wd l="9403" t="4243" r="10013" b="4402">several</wd>

<space/>

<wd l="10099" t="4291" r="10483" b="4402">con-</wd>

</ln>

<ln l="6125" t="4498" r="10262" b="4699" baseLine="4646" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="4498" r="6792" b="4656">ditional</wd>

<space/>

<wd l="6845" t="4498" r="7944" b="4699">probabilities</wd>

<space/>

<wd l="8011" t="4498" r="8213" b="4656">of</wd>

<space/>

<wd l="8242" t="4498" r="9235" b="4656">normalized</wd>

<space/>

<wd l="9293" t="4498" r="10262" b="4656">candidates.</wd>

</ln>

</para>

<para l="6115" t="4747" r="10488" b="6720" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="4747" r="10483" b="4949" baseLine="4896" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="4747" r="6691" b="4906">The</wd>

<space/>

<wd l="6797" t="4747" r="7690" b="4906">remainder</wd>

<space/>

<wd l="7795" t="4747" r="7997" b="4906">of</wd>

<space/>

<wd l="8074" t="4747" r="8390" b="4906">this</wd>

<space/>

<wd l="8491" t="4795" r="8986" b="4949">paper</wd>

<space/>

<wd l="9091" t="4747" r="9230" b="4906">is</wd>

<space/>

<wd l="9341" t="4747" r="10210" b="4949">organized</wd>

<space/>

<wd l="10310" t="4795" r="10483" b="4906">as</wd>

<space/>

</ln>

<ln l="6125" t="5002" r="10488" b="5203" baseLine="5150" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5002" r="6826" b="5160">follows:</wd>

<space/>

<wd l="6965" t="5002" r="7618" b="5160">Section</wd>

<space/>

<wd l="7733" t="5002" r="7834" b="5155">2</wd>

<space/>

<wd l="7954" t="5002" r="8410" b="5203">gives</wd>

<space/>

<wd l="8530" t="5050" r="8626" b="5160">a</wd>

<space/>

<wd l="8746" t="5050" r="9322" b="5203">survey</wd>

<space/>

<wd l="9437" t="5002" r="9638" b="5160">of</wd>

<space/>

<wd l="9734" t="5002" r="10488" b="5160">different</wd>

<space/>

</ln>

<ln l="6125" t="5256" r="10474" b="5458" baseLine="5405" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5256" r="7109" b="5458">approaches</wd>

<space/>

<wd l="7200" t="5256" r="7402" b="5414">of</wd>

<space/>

<wd l="7464" t="5256" r="8525" b="5458">normalizing</wd>

<space/>

<wd l="8606" t="5256" r="9086" b="5458">noisy</wd>

<space/>

<wd l="9168" t="5275" r="9542" b="5414">text.</wd>

<space/>

<wd l="9648" t="5256" r="10296" b="5414">Section</wd>

<space/>

<wd l="10382" t="5256" r="10474" b="5414">3</wd>

<space/>

</ln>

<ln l="6125" t="5506" r="10483" b="5707" baseLine="5654" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5506" r="6936" b="5664">describes</wd>

<space/>

<wd l="7037" t="5506" r="7306" b="5664">the</wd>

<space/>

<wd l="7402" t="5506" r="8626" b="5707">preprocessing</wd>

<space/>

<wd l="8731" t="5525" r="9216" b="5707">stage.</wd>

<space/>

<wd l="9336" t="5506" r="9989" b="5664">Section</wd>

<space/>

<wd l="10085" t="5506" r="10190" b="5659">4</wd>

<space/>

<wd l="10296" t="5506" r="10483" b="5659">il-</wd>

</ln>

<ln l="6125" t="5760" r="10483" b="5962" baseLine="5909" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5760" r="6854" b="5918">lustrates</wd>

<space/>

<wd l="6917" t="5760" r="7186" b="5918">the</wd>

<space/>

<wd l="7253" t="5760" r="8093" b="5918">candidate</wd>

<space/>

<wd l="8160" t="5760" r="9086" b="5962">generation</wd>

<space/>

<wd l="9158" t="5779" r="9643" b="5962">stage.</wd>

<space/>

<wd l="9720" t="5760" r="10056" b="5918">The</wd>

<space/>

<wd l="10118" t="5808" r="10483" b="5962">pro-</wd>

</ln>

<ln l="6115" t="6014" r="10483" b="6216" baseLine="6163" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="6014" r="6643" b="6216">posed</wd>

<space/>

<wd l="6715" t="6014" r="7550" b="6173">candidate</wd>

<space/>

<wd l="7637" t="6014" r="8410" b="6173">selection</wd>

<space/>

<wd l="8482" t="6014" r="9144" b="6173">method</wd>

<space/>

<wd l="9221" t="6014" r="9360" b="6173">is</wd>

<space/>

<wd l="9442" t="6014" r="10483" b="6173">demonstrat-</wd>

</ln>

<ln l="6125" t="6264" r="10483" b="6466" baseLine="6418" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6264" r="6336" b="6422">ed</wd>

<space/>

<wd l="6398" t="6264" r="6566" b="6418">in</wd>

<space/>

<wd l="6643" t="6264" r="7291" b="6422">Section</wd>

<space/>

<wd l="7363" t="6269" r="7507" b="6422">5.</wd>

<space/>

<wd l="7584" t="6264" r="8256" b="6466">Finally,</wd>

<space/>

<wd l="8338" t="6264" r="8986" b="6422">Section</wd>

<space/>

<wd l="9062" t="6264" r="9158" b="6422">6</wd>

<space/>

<wd l="9230" t="6264" r="10099" b="6422">concludes</wd>

<space/>

<wd l="10171" t="6264" r="10483" b="6422">this</wd>

<space/>

</ln>

<ln l="6115" t="6518" r="9672" b="6720" baseLine="6667" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="6566" r="6614" b="6720">paper</wd>

<space/>

<wd l="6667" t="6518" r="7056" b="6677">with</wd>

<space/>

<wd l="7118" t="6566" r="7214" b="6677">a</wd>

<space/>

<wd l="7277" t="6566" r="8083" b="6720">summary</wd>

<space/>

<wd l="8141" t="6518" r="8458" b="6677">and</wd>

<space/>

<wd l="8515" t="6518" r="9034" b="6677">future</wd>

<space/>

<wd l="9091" t="6518" r="9672" b="6677">works.</wd>

</ln>

</para>

<para l="6125" t="6984" r="7939" b="7152" alignment="left" spaceBefore="210" lsp="exactly" lspExact="273" language="en">

<ln l="6125" t="6984" r="7939" b="7152" baseLine="7147" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16">

<wd l="6125" t="6984" r="6230" b="7147">2</wd>

<space/>

<wd l="6552" t="6984" r="7334" b="7152">Related</wd>

<space/>

<wd l="7397" t="6984" r="7939" b="7152">work</wd>

</ln>

</para>

<para l="6115" t="7411" r="10493" b="10901" alignment="justified" spaceBefore="161" lsp="exactly" lspExact="252" language="en">

<ln l="6125" t="7411" r="10493" b="7613" baseLine="7560" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="7411" r="6466" b="7570">The</wd>

<space/>

<wd l="6523" t="7411" r="7742" b="7570">normalization</wd>

<space/>

<wd l="7805" t="7411" r="8784" b="7613">approaches</wd>

<space/>

<wd l="8851" t="7459" r="9154" b="7570">can</wd>

<space/>

<wd l="9206" t="7411" r="9413" b="7570">be</wd>

<space/>

<wd l="9480" t="7411" r="10493" b="7613">categorized</wd>

<space/>

</ln>

<ln l="6125" t="7666" r="10483" b="7867" baseLine="7814" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="7666" r="6461" b="7824">into</wd>

<space/>

<wd l="6528" t="7666" r="6893" b="7824">four</wd>

<space/>

<wd l="6955" t="7714" r="7594" b="7867">groups.</wd>

<space/>

<wd l="7666" t="7666" r="8002" b="7824">The</wd>

<space/>

<wd l="8069" t="7666" r="8419" b="7824">first</wd>

<space/>

<wd l="8482" t="7714" r="8986" b="7867">group</wd>

<space/>

<wd l="9053" t="7666" r="9192" b="7824">is</wd>

<space/>

<wd l="9259" t="7666" r="9787" b="7824">called</wd>

<space/>

<wd l="9850" t="7666" r="10483" b="7824">statisti-</wd>

</ln>

<ln l="6125" t="7915" r="10493" b="8117" baseLine="8069" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="7915" r="6374" b="8074">cal</wd>

<space/>

<wd l="6514" t="7915" r="7258" b="8074">machine</wd>

<space/>

<wd l="7397" t="7915" r="8328" b="8074">translation</wd>

<space/>

<wd l="8467" t="7915" r="9053" b="8117">(SMT)</wd>

<space/>

<wd l="9192" t="7915" r="10027" b="8117">paradigm</wd>

<space/>

<wd l="10157" t="7915" r="10493" b="8074">that</wd>

<space/>

</ln>

<ln l="6125" t="8170" r="10483" b="8371" baseLine="8323" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8170" r="6960" b="8328">addresses</wd>

<space/>

<wd l="7027" t="8170" r="7296" b="8328">the</wd>

<space/>

<wd l="7358" t="8170" r="8578" b="8328">normalization</wd>

<space/>

<wd l="8635" t="8170" r="9379" b="8371">problem</wd>

<space/>

<wd l="9442" t="8218" r="9619" b="8328">as</wd>

<space/>

<wd l="9686" t="8218" r="9782" b="8328">a</wd>

<space/>

<wd l="9854" t="8170" r="10483" b="8328">statisti-</wd>

</ln>

<ln l="6125" t="8424" r="10483" b="8626" baseLine="8573" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8424" r="6374" b="8582">cal</wd>

<space/>

<wd l="6456" t="8424" r="7200" b="8582">machine</wd>

<space/>

<wd l="7282" t="8424" r="8208" b="8582">translation</wd>

<space/>

<wd l="8285" t="8424" r="8678" b="8582">task.</wd>

<space/>

<wd l="8774" t="8424" r="9158" b="8582">This</wd>

<space/>

<wd l="9235" t="8424" r="10070" b="8626">paradigm</wd>

<space/>

<wd l="10142" t="8472" r="10483" b="8582">was</wd>

<space/>

</ln>

<ln l="6125" t="8674" r="10488" b="8875" baseLine="8827" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8674" r="6480" b="8832">first</wd>

<space/>

<wd l="6610" t="8674" r="7550" b="8832">introduced</wd>

<space/>

<wd l="7666" t="8674" r="7891" b="8875">by</wd>

<space/>

<wd l="8016" t="8674" r="8381" b="8866">Aw,</wd>

<space/>

<wd l="8515" t="8674" r="9120" b="8875">Zhang,</wd>

<space/>

<wd l="9254" t="8674" r="9682" b="8832">Xiao</wd>

<space/>

<wd l="9816" t="8674" r="10138" b="8832">and</wd>

<space/>

<wd l="10267" t="8674" r="10488" b="8832">Su</wd>

<space/>

</ln>

<ln l="6125" t="8928" r="10493" b="9130" baseLine="9082" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8928" r="6701" b="9130">(2006)</wd>

<space/>

<wd l="6806" t="8947" r="6974" b="9086">to</wd>

<space/>

<wd l="7080" t="8928" r="7958" b="9086">normalize</wd>

<space/>

<wd l="8069" t="8928" r="8496" b="9086">SMS</wd>

<space/>

<wd l="8602" t="8947" r="8933" b="9086">text</wd>

<space/>

<wd l="9034" t="8928" r="9365" b="9086">that</wd>

<space/>

<wd l="9461" t="8928" r="10291" b="9086">translates</wd>

<space/>

<wd l="10397" t="8976" r="10493" b="9086">a</wd>

<space/>

</ln>

<ln l="6130" t="9182" r="10488" b="9384" baseLine="9331" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="9230" r="6696" b="9341">source</wd>

<space/>

<wd l="6840" t="9182" r="7622" b="9384">language</wd>

<space/>

<wd l="7766" t="9182" r="8366" b="9384">(UGC)</wd>

<space/>

<wd l="8510" t="9202" r="8678" b="9341">to</wd>

<space/>

<wd l="8827" t="9230" r="8923" b="9341">a</wd>

<space/>

<wd l="9058" t="9202" r="9562" b="9384">target</wd>

<space/>

<wd l="9701" t="9182" r="10488" b="9384">language</wd>

<space/>

</ln>

<ln l="6125" t="9437" r="10488" b="9638" baseLine="9586" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="9437" r="6946" b="9638">(standard</wd>

<space/>

<wd l="7075" t="9437" r="7982" b="9638">language).</wd>

<space/>

<wd l="8131" t="9437" r="8515" b="9595">This</wd>

<space/>

<wd l="8645" t="9437" r="9485" b="9638">paradigm</wd>

<space/>

<wd l="9610" t="9437" r="9902" b="9595">has</wd>

<space/>

<wd l="10046" t="9437" r="10488" b="9595">since</wd>

<space/>

</ln>

<ln l="6115" t="9686" r="10488" b="9888" baseLine="9840" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6115" t="9686" r="6538" b="9845">been</wd>

<space/>

<wd l="6653" t="9686" r="7800" b="9878">re-examined,</wd>

<space/>

<wd l="7925" t="9686" r="8770" b="9888">expanded</wd>

<space/>

<wd l="8885" t="9686" r="9202" b="9845">and</wd>

<space/>

<wd l="9317" t="9686" r="10157" b="9888">improved</wd>

<space/>

<wd l="10262" t="9686" r="10488" b="9888">by</wd>

<space/>

</ln>

<ln l="6125" t="9941" r="10483" b="10142" baseLine="10090" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="9941" r="6576" b="10099">other</wd>

<space/>

<wd l="6662" t="9941" r="7646" b="10099">researchers</wd>

<space/>

<wd l="7742" t="9941" r="8357" b="10142">(Lopez</wd>

<space/>

<wd l="8443" t="9941" r="9154" b="10133">Ludeña,</wd>

<space/>

<wd l="9259" t="9941" r="9571" b="10099">San</wd>

<space/>

<wd l="9672" t="9941" r="10483" b="10142">Segundo,</wd>

<space/>

</ln>

<ln l="6120" t="10195" r="10493" b="10397" baseLine="10344" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="10200" r="6931" b="10387">Montero,</wd>

<space/>

<wd l="7018" t="10200" r="7507" b="10354">Barra</wd>

<space/>

<wd l="7594" t="10195" r="8323" b="10387">Chicote,</wd>

<space/>

<wd l="8414" t="10195" r="8578" b="10354">&amp;</wd>

<space/>

<wd l="8659" t="10200" r="9442" b="10387">Lorenzo,</wd>

<space/>

<wd l="9538" t="10195" r="10090" b="10397">2012).</wd>

<space/>

<wd l="10186" t="10200" r="10493" b="10354">For</wd>

<space/>

</ln>

<ln l="6125" t="10445" r="10488" b="10646" baseLine="10598" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="10445" r="6917" b="10646">example,</wd>

<space/>

<wd l="7008" t="10445" r="7939" b="10603">Kaufmann</wd>

<space/>

<wd l="8030" t="10445" r="8347" b="10603">and</wd>

<space/>

<wd l="8429" t="10445" r="8971" b="10603">Kalita</wd>

<space/>

<wd l="9062" t="10445" r="9638" b="10646">(2010)</wd>

<space/>

<wd l="9730" t="10445" r="10138" b="10603">used</wd>

<space/>

<wd l="10219" t="10445" r="10488" b="10603">the</wd>

<space/>

</ln>

<ln l="6130" t="10699" r="10464" b="10901" baseLine="10853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="10699" r="6974" b="10858">SMT-like</wd>

<space/>

<wd l="7037" t="10699" r="7838" b="10901">approach</wd>

<space/>

<wd l="7896" t="10718" r="8064" b="10858">to</wd>

<space/>

<wd l="8122" t="10699" r="8995" b="10858">normalize</wd>

<space/>

<wd l="9053" t="10699" r="9730" b="10901">English</wd>

<space/>

<wd l="9787" t="10704" r="10464" b="10858">Tweets.</wd>

</ln>

</para>

<para l="6120" t="10954" r="10498" b="15202" alignment="justified" spaceAfter="185" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="10954" r="10498" b="11155" baseLine="11102" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="10958" r="6590" b="11112">To</wd>

<space/>

<wd l="6734" t="10954" r="7608" b="11112">normalize</wd>

<space/>

<wd l="7757" t="10954" r="8184" b="11112">SMS</wd>

<space/>

<wd l="8333" t="10954" r="9168" b="11155">language,</wd>

<space/>

<wd l="9317" t="11002" r="9413" b="11112">a</wd>

<space/>

<wd l="9557" t="10954" r="10498" b="11155">supervised</wd>

<space/>

</ln>

<ln l="6120" t="11208" r="10488" b="11410" baseLine="11357" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11208" r="6600" b="11410">noisy</wd>

<space/>

<wd l="6835" t="11208" r="7512" b="11366">channel</wd>

<space/>

<wd l="7747" t="11208" r="8290" b="11366">model</wd>

<space/>

<wd l="8525" t="11256" r="8861" b="11366">was</wd>

<space/>

<wd l="9101" t="11208" r="10046" b="11366">introduced</wd>

<space/>

<wd l="10262" t="11208" r="10488" b="11410">by</wd>

<space/>

</ln>

<ln l="6125" t="11458" r="10483" b="11659" baseLine="11611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11458" r="7162" b="11659">Choudhury,</wd>

<space/>

<wd l="7248" t="11458" r="7752" b="11650">Saraf,</wd>

<space/>

<wd l="7824" t="11458" r="8227" b="11650">Jain,</wd>

<space/>

<wd l="8314" t="11458" r="8928" b="11650">Sarkar,</wd>

<space/>

<wd l="9010" t="11458" r="9326" b="11616">and</wd>

<space/>

<wd l="9394" t="11462" r="9830" b="11616">Basu</wd>

<space/>

<wd l="9907" t="11458" r="10483" b="11659">(2007)</wd>

<space/>

</ln>

<ln l="6120" t="11712" r="10483" b="11914" baseLine="11861" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11712" r="6451" b="11870">that</wd>

<space/>

<wd l="6533" t="11712" r="6941" b="11870">used</wd>

<space/>

<wd l="7018" t="11760" r="7114" b="11870">a</wd>

<space/>

<wd l="7190" t="11712" r="7786" b="11870">hidden</wd>

<space/>

<wd l="7862" t="11712" r="8563" b="11870">Markov</wd>

<space/>

<wd l="8640" t="11712" r="9182" b="11870">model</wd>

<space/>

<wd l="9274" t="11712" r="10008" b="11914">(HMM).</wd>

<space/>

<wd l="10099" t="11712" r="10483" b="11870">This</wd>

<space/>

</ln>

<ln l="6125" t="11966" r="10488" b="12168" baseLine="12115" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11966" r="6931" b="12168">approach</wd>

<space/>

<wd l="7056" t="11966" r="7694" b="12125">mimics</wd>

<space/>

<wd l="7824" t="11966" r="8093" b="12125">the</wd>

<space/>

<wd l="8227" t="11966" r="8630" b="12168">spell</wd>

<space/>

<wd l="8765" t="11966" r="9552" b="12168">checking</wd>

<space/>

<wd l="9677" t="11966" r="10037" b="12125">task</wd>

<space/>

<wd l="10157" t="11966" r="10488" b="12125">that</wd>

<space/>

</ln>

<ln l="6120" t="12216" r="10488" b="12418" baseLine="12365" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="12216" r="6499" b="12374">tries</wd>

<space/>

<wd l="6624" t="12235" r="6792" b="12374">to</wd>

<space/>

<wd l="6917" t="12216" r="7502" b="12374">handle</wd>

<space/>

<wd l="7627" t="12216" r="7896" b="12374">the</wd>

<space/>

<wd l="8021" t="12216" r="9245" b="12374">normalization</wd>

<space/>

<wd l="9360" t="12216" r="10099" b="12418">problem</wd>

<space/>

<wd l="10219" t="12216" r="10488" b="12374">via</wd>

<space/>

</ln>

<ln l="6120" t="12470" r="10483" b="12672" baseLine="12619" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="12470" r="6600" b="12672">noisy</wd>

<space/>

<wd l="6667" t="12470" r="7344" b="12629">channel</wd>

<space/>

<wd l="7411" t="12470" r="8045" b="12629">models</wd>

<space/>

<wd l="8112" t="12470" r="8443" b="12629">that</wd>

<space/>

<wd l="8515" t="12470" r="8986" b="12672">study</wd>

<space/>

<wd l="9048" t="12470" r="9317" b="12629">the</wd>

<space/>

<wd l="9379" t="12470" r="9840" b="12629">UGC</wd>

<space/>

<wd l="9907" t="12490" r="10238" b="12629">text</wd>

<space/>

<wd l="10310" t="12518" r="10483" b="12629">as</wd>

<space/>

</ln>

<ln l="6125" t="12725" r="10483" b="12926" baseLine="12874" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12773" r="6221" b="12883">a</wd>

<space/>

<wd l="6293" t="12725" r="6773" b="12926">noisy</wd>

<space/>

<wd l="6845" t="12725" r="7493" b="12883">version</wd>

<space/>

<wd l="7574" t="12725" r="7776" b="12883">of</wd>

<space/>

<wd l="7838" t="12725" r="8578" b="12883">standard</wd>

<space/>

<wd l="8650" t="12725" r="9485" b="12926">language.</wd>

<space/>

<wd l="9576" t="12725" r="9960" b="12883">This</wd>

<space/>

<wd l="10032" t="12773" r="10483" b="12926">para-</wd>

</ln>

<ln l="6125" t="12974" r="10493" b="13176" baseLine="13123" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12974" r="6576" b="13176">digm</wd>

<space/>

<wd l="6629" t="12974" r="6922" b="13133">has</wd>

<space/>

<wd l="6979" t="12974" r="7402" b="13133">been</wd>

<space/>

<wd l="7474" t="12974" r="8434" b="13133">scrutinized</wd>

<space/>

<wd l="8491" t="12974" r="8813" b="13133">and</wd>

<space/>

<wd l="8870" t="12974" r="9701" b="13133">enhanced</wd>

<space/>

<wd l="9754" t="12974" r="9979" b="13176">by</wd>

<space/>

<wd l="10042" t="12974" r="10493" b="13133">other</wd>

<space/>

</ln>

<ln l="6120" t="13229" r="10488" b="13430" baseLine="13378" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="13229" r="7109" b="13387">researchers</wd>

<space/>

<wd l="7272" t="13229" r="7646" b="13430">(Liu</wd>

<space/>

<wd l="7810" t="13248" r="7963" b="13387">et</wd>

<space/>

<wd l="8122" t="13229" r="8381" b="13421">al.,</wd>

<space/>

<wd l="8544" t="13229" r="9029" b="13421">2011;</wd>

<space/>

<wd l="9197" t="13234" r="9614" b="13421">Xue,</wd>

<space/>

<wd l="9778" t="13229" r="10157" b="13421">Yin,</wd>

<space/>

<wd l="10325" t="13229" r="10488" b="13387">&amp;</wd>

<space/>

</ln>

<ln l="6120" t="13483" r="10493" b="13685" baseLine="13632" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="13483" r="6902" b="13675">Davison,</wd>

<space/>

<wd l="7118" t="13483" r="7766" b="13685">2011a).</wd>

<space/>

<wd l="7982" t="13488" r="8290" b="13642">For</wd>

<space/>

<wd l="8491" t="13483" r="9283" b="13685">example,</wd>

<space/>

<wd l="9494" t="13483" r="9974" b="13642">Cook</wd>

<space/>

<wd l="10171" t="13483" r="10493" b="13642">and</wd>

<space/>

</ln>

<ln l="6130" t="13738" r="10483" b="13939" baseLine="13886" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="13738" r="7027" b="13896">Stevenson</wd>

<space/>

<wd l="7114" t="13738" r="7690" b="13939">(2009)</wd>

<space/>

<wd l="7781" t="13738" r="8582" b="13896">modified</wd>

<space/>

<wd l="8659" t="13738" r="8971" b="13896">this</wd>

<space/>

<wd l="9067" t="13738" r="9869" b="13939">approach</wd>

<space/>

<wd l="9950" t="13757" r="10118" b="13896">to</wd>

<space/>

<wd l="10210" t="13738" r="10483" b="13896">de-</wd>

</ln>

<ln l="6130" t="13987" r="10483" b="14189" baseLine="14136" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="13987" r="6490" b="14189">sign</wd>

<space/>

<wd l="6571" t="14035" r="6778" b="14146">an</wd>

<space/>

<wd l="6854" t="13987" r="8021" b="14189">unsupervised</wd>

<space/>

<wd l="8093" t="13987" r="8760" b="14146">method</wd>

<space/>

<wd l="8832" t="13987" r="9312" b="14189">using</wd>

<space/>

<wd l="9384" t="13987" r="10483" b="14189">probabilistic</wd>

<space/>

</ln>

<ln l="6120" t="14237" r="10488" b="14438" baseLine="14390" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="14237" r="6754" b="14395">models</wd>

<space/>

<wd l="6917" t="14237" r="7171" b="14395">for</wd>

<space/>

<wd l="7330" t="14237" r="7718" b="14438">only</wd>

<space/>

<wd l="7867" t="14237" r="8309" b="14395">three</wd>

<space/>

<wd l="8467" t="14285" r="9230" b="14395">common</wd>

<space/>

<wd l="9389" t="14237" r="10488" b="14395">abbreviation</wd>

<space/>

</ln>

<ln l="6120" t="14491" r="10493" b="14693" baseLine="14645" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="14510" r="6629" b="14693">types:</wd>

<space/>

<wd l="6797" t="14491" r="7464" b="14693">stylistic</wd>

<space/>

<wd l="7613" t="14491" r="8443" b="14683">variation,</wd>

<space/>

<wd l="8582" t="14491" r="9115" b="14693">prefix</wd>

<space/>

<wd l="9259" t="14491" r="10022" b="14693">clipping,</wd>

<space/>

<wd l="10171" t="14491" r="10493" b="14650">and</wd>

<space/>

</ln>

<ln l="6130" t="14746" r="10483" b="14947" baseLine="14894" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="14746" r="7234" b="14947">subsequence</wd>

<space/>

<wd l="7320" t="14746" r="8458" b="14904">abbreviation.</wd>

<space/>

<wd l="8549" t="14750" r="8726" b="14899">In</wd>

<space/>

<wd l="8808" t="14746" r="9581" b="14938">addition,</wd>

<space/>

<wd l="9662" t="14746" r="10483" b="14938">Beaufort,</wd>

<space/>

</ln>

<ln l="6120" t="15000" r="10493" b="15202" baseLine="15149" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="15000" r="7013" b="15192">Roekhaut,</wd>

<space/>

<wd l="7118" t="15000" r="7968" b="15202">Cougnon,</wd>

<space/>

<wd l="8074" t="15000" r="8395" b="15158">and</wd>

<space/>

<wd l="8477" t="15000" r="9053" b="15158">Fairon</wd>

<space/>

<wd l="9154" t="15000" r="9730" b="15202">(2010)</wd>

<space/>

<wd l="9830" t="15000" r="10493" b="15202">merged</wd>

</ln>

</para>

</column>

</section>

<dd l="1409" t="15736" r="10502" b="15977">

<para l="5804" t="15792" r="6148" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15792" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="41">

<wd l="5870" t="15792" r="6082" b="15946">20</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1401" marginTop="1417" marginRight="1392" marginBottom="1292" offsetX="-6" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1401" t="1417" r="10517" b="15380">

<column l="1401" t="1417" r="5803" b="15380">

<para l="1416" t="1464" r="5779" b="1872" alignment="justified" spaceBefore="2" lsp="exactly" lspExact="252" language="en">

<ln l="1416" t="1464" r="5779" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="1464" r="1685" b="1622">the</wd>

<space/>

<wd l="1776" t="1464" r="2616" b="1622">SMT-like</wd>

<space/>

<wd l="2698" t="1464" r="3019" b="1622">and</wd>

<space/>

<wd l="3086" t="1464" r="3355" b="1622">the</wd>

<space/>

<wd l="3442" t="1464" r="3845" b="1666">spell</wd>

<space/>

<wd l="3926" t="1464" r="4718" b="1666">checking</wd>

<space/>

<wd l="4800" t="1464" r="5779" b="1666">approaches</wd>

<space/>

</ln>

<ln l="1416" t="1714" r="3816" b="1872" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="1733" r="1584" b="1872">to</wd>

<space/>

<wd l="1646" t="1714" r="2520" b="1872">normalize</wd>

<space/>

<wd l="2578" t="1714" r="3192" b="1872">French</wd>

<space/>

<wd l="3254" t="1714" r="3816" b="1872">SMSs.</wd>

</ln>

</para>

<para l="1411" t="1968" r="5789" b="6470" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="1968" r="5779" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="1968" r="1987" b="2126">The</wd>

<space/>

<wd l="2088" t="1968" r="2510" b="2126">third</wd>

<space/>

<wd l="2606" t="2016" r="3110" b="2170">group</wd>

<space/>

<wd l="3221" t="1968" r="3355" b="2126">is</wd>

<space/>

<wd l="3461" t="1968" r="3730" b="2126">the</wd>

<space/>

<wd l="3835" t="1968" r="4723" b="2170">dictionary</wd>

<space/>

<wd l="4814" t="1968" r="5328" b="2126">based</wd>

<space/>

<wd l="5419" t="2016" r="5779" b="2126">nor-</wd>

</ln>

<ln l="1416" t="2222" r="5789" b="2424" baseLine="2371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="2222" r="2347" b="2381">malization</wd>

<space/>

<wd l="2414" t="2222" r="3264" b="2424">approach,</wd>

<space/>

<wd l="3331" t="2222" r="3869" b="2381">which</wd>

<space/>

<wd l="3931" t="2222" r="4070" b="2381">is</wd>

<space/>

<wd l="4138" t="2270" r="4344" b="2381">an</wd>

<space/>

<wd l="4406" t="2242" r="5400" b="2424">easy-to-use</wd>

<space/>

<wd l="5467" t="2222" r="5789" b="2381">and</wd>

<space/>

</ln>

<ln l="1421" t="2472" r="5784" b="2674" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="2472" r="1738" b="2630">fast</wd>

<space/>

<wd l="1810" t="2472" r="2554" b="2630">solution.</wd>

<space/>

<wd l="2635" t="2472" r="3019" b="2630">This</wd>

<space/>

<wd l="3091" t="2472" r="3893" b="2674">approach</wd>

<space/>

<wd l="3955" t="2472" r="4661" b="2674">requires</wd>

<space/>

<wd l="4738" t="2520" r="4834" b="2630">a</wd>

<space/>

<wd l="4896" t="2472" r="5784" b="2674">dictionary</wd>

<space/>

</ln>

<ln l="1416" t="2726" r="5774" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="2726" r="1978" b="2885">whose</wd>

<space/>

<wd l="2059" t="2726" r="2635" b="2885">entries</wd>

<space/>

<wd l="2717" t="2774" r="2976" b="2885">are</wd>

<space/>

<wd l="3053" t="2726" r="3523" b="2885">OOV</wd>

<space/>

<wd l="3600" t="2726" r="3917" b="2885">and</wd>

<space/>

<wd l="3994" t="2726" r="4738" b="2885">standard</wd>

<space/>

<wd l="4805" t="2726" r="5232" b="2885">form</wd>

<space/>

<wd l="5294" t="2726" r="5774" b="2928">pairs.</wd>

<space/>

</ln>

<ln l="1421" t="2981" r="5779" b="3182" baseLine="3130" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="2986" r="1550" b="3139">It</wd>

<space/>

<wd l="1646" t="2981" r="1939" b="3139">has</wd>

<space/>

<wd l="2035" t="2981" r="2458" b="3139">been</wd>

<space/>

<wd l="2554" t="3029" r="3168" b="3182">proven</wd>

<space/>

<wd l="3264" t="2981" r="3595" b="3139">that</wd>

<space/>

<wd l="3696" t="2981" r="4171" b="3182">using</wd>

<space/>

<wd l="4272" t="3029" r="4368" b="3139">a</wd>

<space/>

<wd l="4469" t="2981" r="5342" b="3182">colloquial</wd>

<space/>

<wd l="5453" t="2981" r="5779" b="3139">dic-</wd>

</ln>

<ln l="1416" t="3235" r="5789" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="3235" r="2045" b="3437">tionary</wd>

<space/>

<wd l="2117" t="3283" r="2414" b="3394">can</wd>

<space/>

<wd l="2496" t="3235" r="3485" b="3437">outperform</wd>

<space/>

<wd l="3562" t="3283" r="4008" b="3394">some</wd>

<space/>

<wd l="4094" t="3235" r="5395" b="3394">state-of-the-art</wd>

<space/>

<wd l="5472" t="3235" r="5789" b="3394">and</wd>

<space/>

</ln>

<ln l="1421" t="3485" r="5770" b="3686" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="3485" r="2174" b="3686">complex</wd>

<space/>

<wd l="2347" t="3485" r="3326" b="3686">approaches</wd>

<space/>

<wd l="3499" t="3485" r="4066" b="3686">(Clark</wd>

<space/>

<wd l="4229" t="3485" r="4392" b="3643">&amp;</wd>

<space/>

<wd l="4560" t="3485" r="5107" b="3677">Araki,</wd>

<space/>

<wd l="5285" t="3485" r="5770" b="3677">2011;</wd>

<space/>

</ln>

<ln l="1426" t="3739" r="5779" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="3739" r="2030" b="3931">Saloot,</wd>

<space/>

<wd l="2146" t="3739" r="2592" b="3931">Idris,</wd>

<space/>

<wd l="2712" t="3739" r="2875" b="3898">&amp;</wd>

<space/>

<wd l="2981" t="3739" r="3821" b="3931">Mahmud,</wd>

<space/>

<wd l="3941" t="3739" r="4493" b="3941">2014).</wd>

<space/>

<wd l="4613" t="3744" r="5462" b="3931">However,</wd>

<space/>

<wd l="5582" t="3739" r="5779" b="3898">its</wd>

<space/>

</ln>

<ln l="1411" t="3994" r="5779" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="3994" r="2530" b="4195">performance</wd>

<space/>

<wd l="2606" t="3994" r="3168" b="4195">highly</wd>

<space/>

<wd l="3240" t="3994" r="3710" b="4152">relies</wd>

<space/>

<wd l="3792" t="4042" r="4008" b="4152">on</wd>

<space/>

<wd l="4085" t="3994" r="4349" b="4152">the</wd>

<space/>

<wd l="4435" t="3994" r="4766" b="4152">size</wd>

<space/>

<wd l="4848" t="3994" r="5050" b="4152">of</wd>

<space/>

<wd l="5098" t="3994" r="5366" b="4152">the</wd>

<space/>

<wd l="5448" t="3994" r="5779" b="4152">dic-</wd>

</ln>

<ln l="1416" t="4243" r="5784" b="4445" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="4243" r="2083" b="4445">tionary.</wd>

<space/>

<wd l="2251" t="4243" r="3163" b="4435">Therefore,</wd>

<space/>

<wd l="3317" t="4248" r="3734" b="4435">Han,</wd>

<space/>

<wd l="3893" t="4243" r="4411" b="4435">Cook,</wd>

<space/>

<wd l="4570" t="4243" r="4891" b="4402">and</wd>

<space/>

<wd l="5035" t="4243" r="5784" b="4402">Baldwin</wd>

<space/>

</ln>

<ln l="1421" t="4498" r="5789" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="4498" r="1997" b="4699">(2012)</wd>

<space/>

<wd l="2150" t="4498" r="3096" b="4656">introduced</wd>

<space/>

<wd l="3235" t="4546" r="3331" b="4656">a</wd>

<space/>

<wd l="3470" t="4498" r="4138" b="4656">method</wd>

<space/>

<wd l="4272" t="4517" r="4440" b="4656">to</wd>

<space/>

<wd l="4594" t="4498" r="5789" b="4699">automatically</wd>

<space/>

</ln>

<ln l="1421" t="4752" r="5779" b="4954" baseLine="4901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="4752" r="2126" b="4954">compile</wd>

<space/>

<wd l="2203" t="4800" r="2299" b="4910">a</wd>

<space/>

<wd l="2366" t="4752" r="2803" b="4954">large</wd>

<space/>

<wd l="2880" t="4752" r="3806" b="4954">dictionary.</wd>

<space/>

<wd l="3893" t="4757" r="4133" b="4910">To</wd>

<space/>

<wd l="4210" t="4752" r="4862" b="4910">address</wd>

<space/>

<wd l="4934" t="4752" r="5203" b="4910">the</wd>

<space/>

<wd l="5280" t="4752" r="5779" b="4910">short-</wd>

</ln>

<ln l="1421" t="5006" r="5774" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5006" r="2155" b="5208">comings</wd>

<space/>

<wd l="2342" t="5006" r="2544" b="5165">of</wd>

<space/>

<wd l="2698" t="5006" r="2962" b="5165">the</wd>

<space/>

<wd l="3144" t="5006" r="4032" b="5208">dictionary</wd>

<space/>

<wd l="4210" t="5006" r="5059" b="5208">approach,</wd>

<space/>

<wd l="5246" t="5006" r="5774" b="5198">Oliva,</wd>

<space/>

</ln>

<ln l="1426" t="5256" r="5779" b="5458" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5256" r="2150" b="5448">Serrano,</wd>

<space/>

<wd l="2251" t="5256" r="2568" b="5414">Del</wd>

<space/>

<wd l="2669" t="5256" r="3398" b="5448">Castillo,</wd>

<space/>

<wd l="3504" t="5256" r="3821" b="5414">and</wd>

<space/>

<wd l="3917" t="5256" r="4517" b="5458">Igesias</wd>

<space/>

<wd l="4622" t="5256" r="5194" b="5458">(2013)</wd>

<space/>

<wd l="5299" t="5256" r="5779" b="5414">intro-</wd>

</ln>

<ln l="1421" t="5510" r="5784" b="5712" baseLine="5659" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5510" r="1949" b="5669">duced</wd>

<space/>

<wd l="2040" t="5558" r="2136" b="5669">a</wd>

<space/>

<wd l="2237" t="5510" r="2837" b="5712">special</wd>

<space/>

<wd l="2942" t="5510" r="3629" b="5712">Spanish</wd>

<space/>

<wd l="3720" t="5510" r="4474" b="5712">phonetic</wd>

<space/>

<wd l="4579" t="5510" r="5510" b="5712">dictionary,</wd>

<space/>

<wd l="5616" t="5510" r="5784" b="5664">in</wd>

<space/>

</ln>

<ln l="1416" t="5765" r="5784" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="5765" r="1958" b="5923">which</wd>

<space/>

<wd l="2016" t="5765" r="2419" b="5923">each</wd>

<space/>

<wd l="2477" t="5784" r="2928" b="5966">entry</wd>

<space/>

<wd l="2986" t="5765" r="3125" b="5923">is</wd>

<space/>

<wd l="3192" t="5765" r="3826" b="5923">formed</wd>

<space/>

<wd l="3874" t="5765" r="4099" b="5966">by</wd>

<space/>

<wd l="4157" t="5813" r="4253" b="5923">a</wd>

<space/>

<wd l="4315" t="5765" r="4838" b="5923">coded</wd>

<space/>

<wd l="4896" t="5784" r="5784" b="5923">consonant</wd>

<space/>

</ln>

<ln l="1426" t="6014" r="5784" b="6216" baseLine="6163" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6014" r="1968" b="6216">string,</wd>

<space/>

<wd l="2059" t="6014" r="2678" b="6173">vowels</wd>

<space/>

<wd l="2774" t="6014" r="3403" b="6216">strings,</wd>

<space/>

<wd l="3499" t="6014" r="3816" b="6173">and</wd>

<space/>

<wd l="3893" t="6014" r="4301" b="6173">their</wd>

<space/>

<wd l="4378" t="6014" r="5170" b="6216">positions</wd>

<space/>

<wd l="5266" t="6014" r="5434" b="6168">in</wd>

<space/>

<wd l="5515" t="6014" r="5784" b="6173">the</wd>

<space/>

</ln>

<ln l="1416" t="6269" r="5117" b="6470" baseLine="6418" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="6269" r="1920" b="6461">word,</wd>

<space/>

<wd l="1987" t="6269" r="2242" b="6427">for</wd>

<space/>

<wd l="2294" t="6269" r="3355" b="6470">normalizing</wd>

<space/>

<wd l="3418" t="6269" r="4109" b="6470">Spanish</wd>

<space/>

<wd l="4171" t="6269" r="4594" b="6427">SMS</wd>

<space/>

<wd l="4656" t="6288" r="5117" b="6427">texts.</wd>

</ln>

</para>

<para l="1411" t="6523" r="5789" b="9965" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="6523" r="5784" b="6725" baseLine="6672" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1651" t="6523" r="1987" b="6682">The</wd>

<space/>

<wd l="2074" t="6523" r="2606" b="6682">fourth</wd>

<space/>

<wd l="2693" t="6571" r="3197" b="6725">group</wd>

<space/>

<wd l="3278" t="6523" r="4152" b="6682">resembles</wd>

<space/>

<wd l="4243" t="6523" r="5098" b="6682">automatic</wd>

<space/>

<wd l="5194" t="6523" r="5784" b="6725">speech</wd>

<space/>

</ln>

<ln l="1416" t="6773" r="5779" b="6974" baseLine="6922" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1416" t="6773" r="2424" b="6974">recognition</wd>

<space/>

<wd l="2510" t="6773" r="3072" b="6974">(ASR)</wd>

<space/>

<wd l="3173" t="6792" r="3902" b="6974">systems.</wd>

<space/>

<wd l="4003" t="6773" r="4387" b="6931">This</wd>

<space/>

<wd l="4474" t="6773" r="5309" b="6974">paradigm</wd>

<space/>

<wd l="5395" t="6821" r="5779" b="6931">con-</wd>

</ln>

<ln l="1426" t="7027" r="5779" b="7229" baseLine="7176" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1426" t="7027" r="1795" b="7186">sists</wd>

<space/>

<wd l="1925" t="7027" r="2126" b="7186">of</wd>

<space/>

<wd l="2222" t="7027" r="2659" b="7186">three</wd>

<space/>

<wd l="2794" t="7046" r="3269" b="7229">steps:</wd>

<space/>

<wd l="3432" t="7027" r="3581" b="7229">1)</wd>

<space/>

<wd l="3715" t="7027" r="4656" b="7229">converting</wd>

<space/>

<wd l="4771" t="7027" r="5040" b="7186">the</wd>

<space/>

<wd l="5165" t="7046" r="5496" b="7186">text</wd>

<space/>

<wd l="5616" t="7046" r="5779" b="7186">to</wd>

<space/>

</ln>

<ln l="1426" t="7282" r="5779" b="7483" baseLine="7430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1426" t="7282" r="2002" b="7483">strings</wd>

<space/>

<wd l="2088" t="7282" r="2290" b="7440">of</wd>

<space/>

<wd l="2338" t="7282" r="3230" b="7483">phonemes</wd>

<space/>

<wd l="3312" t="7282" r="3581" b="7440">via</wd>

<space/>

<wd l="3662" t="7282" r="4963" b="7483">letter-to-phone</wd>

<space/>

<wd l="5040" t="7282" r="5520" b="7474">rules,</wd>

<space/>

<wd l="5606" t="7282" r="5779" b="7483">2)</wd>

<space/>

</ln>

<ln l="1421" t="7536" r="5789" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="7536" r="2362" b="7738">converting</wd>

<space/>

<wd l="2434" t="7536" r="2702" b="7694">the</wd>

<space/>

<wd l="2784" t="7536" r="3355" b="7738">strings</wd>

<space/>

<wd l="3442" t="7536" r="3643" b="7694">of</wd>

<space/>

<wd l="3691" t="7536" r="4579" b="7738">phonemes</wd>

<space/>

<wd l="4661" t="7555" r="4829" b="7694">to</wd>

<space/>

<wd l="4906" t="7536" r="5438" b="7694">words</wd>

<space/>

<wd l="5515" t="7536" r="5789" b="7694">via</wd>

<space/>

</ln>

<ln l="1411" t="7786" r="5784" b="7987" baseLine="7934" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="7786" r="2640" b="7987">pronunciation</wd>

<space/>

<wd l="2746" t="7786" r="3816" b="7978">dictionaries,</wd>

<space/>

<wd l="3926" t="7786" r="4243" b="7944">and</wd>

<space/>

<wd l="4344" t="7786" r="4517" b="7987">3)</wd>

<space/>

<wd l="4627" t="7786" r="5414" b="7987">choosing</wd>

<space/>

<wd l="5515" t="7786" r="5784" b="7944">the</wd>

<space/>

</ln>

<ln l="1416" t="8040" r="5784" b="8242" baseLine="8189" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1416" t="8059" r="1843" b="8198">most</wd>

<space/>

<wd l="1958" t="8040" r="2736" b="8242">probable</wd>

<space/>

<wd l="2856" t="8040" r="3437" b="8198">words.</wd>

<space/>

<wd l="3571" t="8040" r="3907" b="8198">The</wd>

<space/>

<wd l="4027" t="8040" r="4853" b="8198">ASR-like</wd>

<space/>

<wd l="4982" t="8040" r="5784" b="8242">approach</wd>

<space/>

</ln>

<ln l="1416" t="8290" r="5784" b="8491" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1416" t="8290" r="1709" b="8448">has</wd>

<space/>

<wd l="1781" t="8290" r="2203" b="8448">been</wd>

<space/>

<wd l="2275" t="8290" r="2938" b="8491">merged</wd>

<space/>

<wd l="3010" t="8290" r="3398" b="8448">with</wd>

<space/>

<wd l="3480" t="8290" r="3926" b="8448">other</wd>

<space/>

<wd l="4003" t="8290" r="4987" b="8491">approaches</wd>

<space/>

<wd l="5064" t="8309" r="5232" b="8448">to</wd>

<space/>

<wd l="5304" t="8290" r="5784" b="8448">boost</wd>

<space/>

</ln>

<ln l="1421" t="8544" r="5779" b="8746" baseLine="8693" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="8544" r="1622" b="8702">its</wd>

<space/>

<wd l="1771" t="8544" r="2928" b="8746">performance.</wd>

<space/>

<wd l="3091" t="8544" r="3715" b="8736">Kobus,</wd>

<space/>

<wd l="3869" t="8549" r="4402" b="8736">Yvon,</wd>

<space/>

<wd l="4565" t="8544" r="4882" b="8702">and</wd>

<space/>

<wd l="5026" t="8544" r="5779" b="8702">Damnati</wd>

<space/>

</ln>

<ln l="1421" t="8798" r="5779" b="9000" baseLine="8947" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="8798" r="1997" b="9000">(2008)</wd>

<space/>

<wd l="2136" t="8798" r="3005" b="8957">combined</wd>

<space/>

<wd l="3125" t="8798" r="3950" b="8957">ASR-like</wd>

<space/>

<wd l="4085" t="8798" r="4402" b="8957">and</wd>

<space/>

<wd l="4531" t="8798" r="5376" b="8957">SMT-like</wd>

<space/>

<wd l="5506" t="8846" r="5779" b="9000">ap-</wd>

</ln>

<ln l="1411" t="9048" r="5779" b="9250" baseLine="9202" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="9048" r="2198" b="9250">proaches</wd>

<space/>

<wd l="2395" t="9067" r="2563" b="9206">to</wd>

<space/>

<wd l="2765" t="9048" r="3643" b="9206">normalize</wd>

<space/>

<wd l="3840" t="9048" r="4454" b="9206">French</wd>

<space/>

<wd l="4656" t="9048" r="5218" b="9206">SMSs.</wd>

<space/>

<wd l="5424" t="9048" r="5779" b="9240">Lin,</wd>

<space/>

</ln>

<ln l="1416" t="9302" r="5779" b="9504" baseLine="9451" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1416" t="9302" r="2088" b="9494">Bilmes,</wd>

<space/>

<wd l="2170" t="9302" r="2904" b="9504">Vergyri,</wd>

<space/>

<wd l="2995" t="9302" r="3312" b="9461">and</wd>

<space/>

<wd l="3379" t="9302" r="4272" b="9461">Kirchhoff</wd>

<space/>

<wd l="4330" t="9302" r="4906" b="9504">(2007)</wd>

<space/>

<wd l="4987" t="9302" r="5395" b="9461">used</wd>

<space/>

<wd l="5467" t="9302" r="5779" b="9461">this</wd>

<space/>

</ln>

<ln l="1421" t="9557" r="5789" b="9758" baseLine="9706" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="9557" r="2227" b="9758">approach</wd>

<space/>

<wd l="2323" t="9576" r="2491" b="9715">to</wd>

<space/>

<wd l="2602" t="9557" r="3125" b="9715">detect</wd>

<space/>

<wd l="3226" t="9557" r="3696" b="9715">OOV</wd>

<space/>

<wd l="3797" t="9557" r="4334" b="9715">words</wd>

<space/>

<wd l="4440" t="9557" r="4608" b="9710">in</wd>

<space/>

<wd l="4718" t="9557" r="5789" b="9715">switchboard</wd>

<space/>

</ln>

<ln l="1421" t="9806" r="1829" b="9965" baseLine="9960" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="9806" r="1829" b="9965">data.</wd>

</ln>

</para>

<para l="1416" t="10061" r="5789" b="13291" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="10061" r="5779" b="10262" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="10066" r="2006" b="10219">Han</wd>

<space/>

<wd l="2112" t="10061" r="2429" b="10219">and</wd>

<space/>

<wd l="2520" t="10061" r="3264" b="10219">Baldwin</wd>

<space/>

<wd l="3360" t="10061" r="3936" b="10262">(2011)</wd>

<space/>

<wd l="4042" t="10061" r="4920" b="10219">illustrated</wd>

<space/>

<wd l="5016" t="10109" r="5112" b="10219">a</wd>

<space/>

<wd l="5203" t="10061" r="5779" b="10219">lexical</wd>

<space/>

</ln>

<ln l="1416" t="10315" r="5789" b="10517" baseLine="10464" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10315" r="2083" b="10474">method</wd>

<space/>

<wd l="2160" t="10315" r="2414" b="10474">for</wd>

<space/>

<wd l="2491" t="10315" r="3557" b="10517">normalizing</wd>

<space/>

<wd l="3634" t="10315" r="4282" b="10474">Twitter</wd>

<space/>

<wd l="4358" t="10363" r="5232" b="10517">messages.</wd>

<space/>

<wd l="5318" t="10315" r="5789" b="10474">After</wd>

<space/>

</ln>

<ln l="1421" t="10570" r="5779" b="10771" baseLine="10718" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10570" r="2227" b="10771">detecting</wd>

<space/>

<wd l="2314" t="10570" r="2918" b="10762">OOVs,</wd>

<space/>

<wd l="3014" t="10570" r="3907" b="10728">ill-formed</wd>

<space/>

<wd l="3984" t="10570" r="4574" b="10762">words,</wd>

<space/>

<wd l="4666" t="10570" r="4987" b="10728">and</wd>

<space/>

<wd l="5069" t="10589" r="5779" b="10771">generat-</wd>

</ln>

<ln l="1421" t="10819" r="5779" b="11021" baseLine="10973" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10819" r="1699" b="11021">ing</wd>

<space/>

<wd l="1781" t="10867" r="1877" b="10978">a</wd>

<space/>

<wd l="1963" t="10838" r="2198" b="10978">set</wd>

<space/>

<wd l="2285" t="10819" r="2482" b="10978">of</wd>

<space/>

<wd l="2544" t="10819" r="3518" b="11011">candidates,</wd>

<space/>

<wd l="3600" t="10819" r="3869" b="10978">the</wd>

<space/>

<wd l="3941" t="10819" r="4301" b="10978">best</wd>

<space/>

<wd l="4387" t="10819" r="5222" b="10978">candidate</wd>

<space/>

<wd l="5304" t="10819" r="5443" b="10978">is</wd>

<space/>

<wd l="5534" t="10867" r="5779" b="10978">se-</wd>

</ln>

<ln l="1421" t="11074" r="5779" b="11275" baseLine="11222" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11074" r="1949" b="11232">lected</wd>

<space/>

<wd l="2011" t="11074" r="2486" b="11275">using</wd>

<space/>

<wd l="2554" t="11122" r="2650" b="11232">a</wd>

<space/>

<wd l="2712" t="11074" r="3326" b="11275">variety</wd>

<space/>

<wd l="3394" t="11074" r="3595" b="11232">of</wd>

<space/>

<wd l="3638" t="11074" r="4325" b="11232">metrics:</wd>

<space/>

<wd l="4411" t="11074" r="4992" b="11232">lexical</wd>

<space/>

<wd l="5064" t="11074" r="5390" b="11232">edit</wd>

<space/>

<wd l="5458" t="11074" r="5779" b="11232">dis-</wd>

</ln>

<ln l="1416" t="11328" r="5779" b="11530" baseLine="11477" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11347" r="1930" b="11520">tance,</wd>

<space/>

<wd l="2026" t="11328" r="2890" b="11530">phonemic</wd>

<space/>

<wd l="2995" t="11328" r="3322" b="11486">edit</wd>

<space/>

<wd l="3422" t="11328" r="4186" b="11520">distance,</wd>

<space/>

<wd l="4291" t="11328" r="4920" b="11530">longest</wd>

<space/>

<wd l="5021" t="11376" r="5779" b="11486">common</wd>

<space/>

</ln>

<ln l="1426" t="11578" r="5779" b="11779" baseLine="11731" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11578" r="2530" b="11779">subsequence</wd>

<space/>

<wd l="2678" t="11578" r="3274" b="11779">(LCS),</wd>

<space/>

<wd l="3427" t="11578" r="3840" b="11736">affix</wd>

<space/>

<wd l="3994" t="11578" r="4838" b="11779">substring,</wd>

<space/>

<wd l="4992" t="11578" r="5779" b="11779">language</wd>

<space/>

</ln>

<ln l="1416" t="11832" r="5779" b="12034" baseLine="11986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11832" r="2016" b="12024">model,</wd>

<space/>

<wd l="2174" t="11832" r="2496" b="11990">and</wd>

<space/>

<wd l="2645" t="11832" r="4272" b="12034">dependency-based</wd>

<space/>

<wd l="4421" t="11832" r="5294" b="12034">frequency</wd>

<space/>

<wd l="5448" t="11832" r="5779" b="11990">fea-</wd>

</ln>

<ln l="1416" t="12086" r="5784" b="12245" baseLine="12235" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12106" r="1891" b="12245">tures.</wd>

<space/>

<wd l="1982" t="12086" r="2323" b="12245">The</wd>

<space/>

<wd l="2405" t="12086" r="3072" b="12245">method</wd>

<space/>

<wd l="3154" t="12086" r="3936" b="12245">achieved</wd>

<space/>

<wd l="4018" t="12134" r="4114" b="12245">a</wd>

<space/>

<wd l="4200" t="12086" r="4574" b="12245">93.4</wd>

<space/>

<wd l="4661" t="12091" r="5237" b="12245">BLEU</wd>

<space/>

<wd l="5328" t="12134" r="5784" b="12245">score</wd>

<space/>

</ln>

<ln l="1421" t="12336" r="5789" b="12538" baseLine="12490" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="12336" r="1589" b="12490">in</wd>

<space/>

<wd l="1646" t="12336" r="2712" b="12538">normalizing</wd>

<space/>

<wd l="2774" t="12336" r="3086" b="12494">549</wd>

<space/>

<wd l="3154" t="12336" r="3826" b="12538">English</wd>

<space/>

<wd l="3883" t="12341" r="4560" b="12494">Tweets.</wd>

<space/>

<wd l="4632" t="12336" r="5016" b="12494">This</wd>

<space/>

<wd l="5078" t="12336" r="5789" b="12538">inspired</wd>

<space/>

</ln>

<ln l="1416" t="12590" r="5779" b="12792" baseLine="12744" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12638" r="1608" b="12749">us</wd>

<space/>

<wd l="1733" t="12610" r="1901" b="12749">to</wd>

<space/>

<wd l="2026" t="12590" r="2592" b="12792">design</wd>

<space/>

<wd l="2717" t="12638" r="2813" b="12749">a</wd>

<space/>

<wd l="2928" t="12590" r="4147" b="12749">normalization</wd>

<space/>

<wd l="4267" t="12590" r="4930" b="12749">method</wd>

<space/>

<wd l="5040" t="12590" r="5371" b="12749">that</wd>

<space/>

<wd l="5491" t="12590" r="5779" b="12749">has</wd>

<space/>

</ln>

<ln l="1416" t="12845" r="5779" b="13046" baseLine="12994" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12845" r="1853" b="13003">three</wd>

<space/>

<wd l="1920" t="12845" r="2434" b="13046">major</wd>

<space/>

<wd l="2501" t="12864" r="3072" b="13046">stages:</wd>

<space/>

<wd l="3149" t="12845" r="4421" b="13046">preprocessing,</wd>

<space/>

<wd l="4493" t="12845" r="5333" b="13003">candidate</wd>

<space/>

<wd l="5400" t="12893" r="5779" b="13046">gen-</wd>

</ln>

<ln l="1421" t="13099" r="4234" b="13291" baseLine="13248" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13099" r="2078" b="13291">eration,</wd>

<space/>

<wd l="2146" t="13099" r="2462" b="13258">and</wd>

<space/>

<wd l="2515" t="13099" r="3355" b="13258">candidate</wd>

<space/>

<wd l="3418" t="13099" r="4234" b="13258">selection.</wd>

</ln>

</para>

<para l="1416" t="13560" r="3288" b="13781" alignment="left" spaceBefore="206" lsp="exactly" lspExact="276" language="en">

<ln l="1416" t="13560" r="3288" b="13781" baseLine="13723" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="15">

<wd l="1416" t="13565" r="1526" b="13733">3</wd>

<space/>

<wd l="1853" t="13565" r="3288" b="13781">Preprocessing</wd>

</ln>

</para>

<para l="1416" t="13987" r="5789" b="15202" alignment="justified" spaceBefore="159" spaceAfter="167" lsp="exactly" lspExact="253" language="en">

<ln l="1416" t="13987" r="5779" b="14189" baseLine="14141" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="13987" r="1872" b="14179">First,</wd>

<space/>

<wd l="1934" t="14035" r="2189" b="14146">we</wd>

<space/>

<wd l="2242" t="13987" r="2957" b="14189">perform</wd>

<space/>

<wd l="3019" t="14035" r="3470" b="14146">some</wd>

<space/>

<wd l="3533" t="13987" r="4042" b="14146">initial</wd>

<space/>

<wd l="4099" t="14006" r="4430" b="14146">text</wd>

<space/>

<wd l="4488" t="13987" r="5184" b="14189">refining</wd>

<space/>

<wd l="5242" t="14035" r="5458" b="14146">on</wd>

<space/>

<wd l="5510" t="13987" r="5779" b="14146">the</wd>

<space/>

</ln>

<ln l="1416" t="14242" r="5784" b="14443" baseLine="14390" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="14261" r="2026" b="14400">tweets.</wd>

<space/>

<wd l="2194" t="14246" r="2501" b="14400">For</wd>

<space/>

<wd l="2659" t="14242" r="3446" b="14443">example,</wd>

<space/>

<wd l="3614" t="14242" r="4651" b="14400">consecutive</wd>

<space/>

<wd l="4810" t="14242" r="5784" b="14443">whitespace</wd>

<space/>

</ln>

<ln l="1421" t="14496" r="5789" b="14698" baseLine="14645" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14496" r="2304" b="14654">characters</wd>

<space/>

<wd l="2386" t="14544" r="2645" b="14654">are</wd>

<space/>

<wd l="2717" t="14496" r="3466" b="14654">trimmed</wd>

<space/>

<wd l="3528" t="14515" r="3696" b="14654">to</wd>

<space/>

<wd l="3782" t="14496" r="4296" b="14698">single</wd>

<space/>

<wd l="4368" t="14496" r="5395" b="14698">whitespace,</wd>

<space/>

<wd l="5477" t="14496" r="5789" b="14654">and</wd>

<space/>

</ln>

<ln l="1421" t="14746" r="5774" b="14947" baseLine="14899" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14765" r="1862" b="14904">extra</wd>

<space/>

<wd l="1954" t="14746" r="3010" b="14947">whitespaces</wd>

<space/>

<wd l="3115" t="14794" r="3379" b="14904">are</wd>

<space/>

<wd l="3475" t="14746" r="4248" b="14904">removed</wd>

<space/>

<wd l="4339" t="14746" r="4766" b="14904">from</wd>

<space/>

<wd l="4858" t="14746" r="5126" b="14904">the</wd>

<space/>

<wd l="5218" t="14746" r="5774" b="14947">begin-</wd>

</ln>

<ln l="1416" t="15000" r="5784" b="15202" baseLine="15154" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="15000" r="1810" b="15202">ning</wd>

<space/>

<wd l="1872" t="15000" r="2194" b="15158">and</wd>

<space/>

<wd l="2251" t="15000" r="2573" b="15158">end</wd>

<space/>

<wd l="2630" t="15000" r="2832" b="15158">of</wd>

<space/>

<wd l="2875" t="15005" r="3552" b="15158">Tweets.</wd>

<space/>

<wd l="3624" t="15000" r="3960" b="15158">The</wd>

<space/>

<wd l="4027" t="15000" r="4536" b="15158">initial</wd>

<space/>

<wd l="4608" t="15019" r="5050" b="15202">stage</wd>

<space/>

<wd l="5117" t="15000" r="5318" b="15158">of</wd>

<space/>

<wd l="5357" t="15019" r="5784" b="15158">most</wd>

</ln>

</para>

</column>

<column l="6115" t="1417" r="10517" b="15380">

<para l="6115" t="1464" r="10493" b="4402" alignment="justified" spaceBefore="2" lsp="exactly" lspExact="252" language="en">

<ln l="6115" t="1464" r="10483" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="1469" r="6528" b="1622">NLP</wd>

<space/>

<wd l="6605" t="1464" r="7042" b="1622">tasks</wd>

<space/>

<wd l="7118" t="1464" r="7253" b="1622">is</wd>

<space/>

<wd l="7325" t="1464" r="7594" b="1622">the</wd>

<space/>

<wd l="7661" t="1464" r="8789" b="1622">tokenization.</wd>

<space/>

<wd l="8870" t="1464" r="9605" b="1666">Existing</wd>

<space/>

<wd l="9667" t="1464" r="10483" b="1622">tokeniza-</wd>

</ln>

<ln l="6120" t="1714" r="10488" b="1915" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="1714" r="6466" b="1872">tion</wd>

<space/>

<wd l="6566" t="1714" r="7310" b="1872">methods</wd>

<space/>

<wd l="7421" t="1762" r="7723" b="1872">can</wd>

<space/>

<wd l="7819" t="1714" r="8534" b="1915">perform</wd>

<space/>

<wd l="8640" t="1714" r="9538" b="1915">accurately</wd>

<space/>

<wd l="9638" t="1714" r="10118" b="1872">when</wd>

<space/>

<wd l="10219" t="1714" r="10488" b="1872">the</wd>

<space/>

</ln>

<ln l="6120" t="1968" r="10493" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="1987" r="6451" b="2126">text</wd>

<space/>

<wd l="6528" t="1968" r="6667" b="2126">is</wd>

<space/>

<wd l="6744" t="1968" r="7714" b="2170">thoroughly</wd>

<space/>

<wd l="7786" t="1968" r="8294" b="2160">clean,</wd>

<space/>

<wd l="8386" t="1968" r="8779" b="2126">such</wd>

<space/>

<wd l="8856" t="2016" r="9029" b="2126">as</wd>

<space/>

<wd l="9106" t="2016" r="9557" b="2126">news</wd>

<space/>

<wd l="9638" t="1968" r="10094" b="2126">feeds</wd>

<space/>

<wd l="10176" t="1968" r="10493" b="2126">and</wd>

<space/>

</ln>

<ln l="6115" t="2222" r="10488" b="2424" baseLine="2371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="2222" r="6566" b="2381">book</wd>

<space/>

<wd l="6653" t="2222" r="7387" b="2381">datasets.</wd>

<space/>

<wd l="7488" t="2227" r="7795" b="2381">For</wd>

<space/>

<wd l="7882" t="2222" r="8669" b="2424">example,</wd>

<space/>

<wd l="8765" t="2222" r="9029" b="2381">the</wd>

<space/>

<wd l="9120" t="2222" r="10488" b="2381">PTB-Tokenizer</wd>

<space/>

</ln>

<ln l="6125" t="2472" r="10488" b="2664" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="2472" r="6264" b="2630">is</wd>

<space/>

<wd l="6341" t="2520" r="6437" b="2630">a</wd>

<space/>

<wd l="6509" t="2472" r="6869" b="2664">fast,</wd>

<space/>

<wd l="6950" t="2472" r="8141" b="2664">deterministic,</wd>

<space/>

<wd l="8218" t="2472" r="8539" b="2630">and</wd>

<space/>

<wd l="8602" t="2472" r="9336" b="2630">efficient</wd>

<space/>

<wd l="9398" t="2472" r="10488" b="2630">tokenization</wd>

<space/>

</ln>

<ln l="6120" t="2726" r="10483" b="2918" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="2726" r="6826" b="2885">method.</wd>

<space/>

<wd l="6926" t="2726" r="7190" b="2885">On</wd>

<space/>

<wd l="7277" t="2726" r="7546" b="2885">the</wd>

<space/>

<wd l="7637" t="2726" r="8088" b="2885">other</wd>

<space/>

<wd l="8170" t="2726" r="8645" b="2918">hand,</wd>

<space/>

<wd l="8741" t="2726" r="9202" b="2885">UGC</wd>

<space/>

<wd l="9288" t="2746" r="9619" b="2885">text</wd>

<space/>

<wd l="9710" t="2726" r="10483" b="2885">demands</wd>

<space/>

</ln>

<ln l="6130" t="2981" r="10483" b="3182" baseLine="3130" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2981" r="6730" b="3182">special</wd>

<space/>

<wd l="6902" t="2981" r="7646" b="3139">methods</wd>

<space/>

<wd l="7824" t="2981" r="8136" b="3139">due</wd>

<space/>

<wd l="8304" t="3000" r="8472" b="3139">to</wd>

<space/>

<wd l="8650" t="2981" r="9768" b="3182">irregularities</wd>

<space/>

<wd l="9946" t="2981" r="10114" b="3134">in</wd>

<space/>

<wd l="10286" t="2981" r="10483" b="3139">its</wd>

<space/>

</ln>

<ln l="6120" t="3235" r="10493" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3235" r="7181" b="3437">whitespaces</wd>

<space/>

<wd l="7301" t="3235" r="7622" b="3394">and</wd>

<space/>

<wd l="7723" t="3235" r="8808" b="3437">punctuation.</wd>

<space/>

<wd l="8933" t="3235" r="9173" b="3394">As</wd>

<space/>

<wd l="9298" t="3235" r="10162" b="3437">suggested</wd>

<space/>

<wd l="10267" t="3235" r="10493" b="3437">by</wd>

<space/>

</ln>

<ln l="6120" t="3485" r="10488" b="3686" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3490" r="6672" b="3686">Lopez</wd>

<space/>

<wd l="6830" t="3485" r="7498" b="3643">Ludeña</wd>

<space/>

<wd l="7661" t="3504" r="7814" b="3643">et</wd>

<space/>

<wd l="7978" t="3485" r="8179" b="3643">al.</wd>

<space/>

<wd l="8352" t="3485" r="8986" b="3686">(2012),</wd>

<space/>

<wd l="9154" t="3533" r="9408" b="3643">we</wd>

<space/>

<wd l="9576" t="3485" r="10229" b="3686">employ</wd>

<space/>

<wd l="10392" t="3533" r="10488" b="3643">a</wd>

<space/>

</ln>

<ln l="6130" t="3739" r="10488" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="3739" r="7483" b="3941">straightforward</wd>

<space/>

<wd l="7579" t="3739" r="8035" b="3898">word</wd>

<space/>

<wd l="8141" t="3739" r="9038" b="3941">separating</wd>

<space/>

<wd l="9134" t="3739" r="9845" b="3931">method,</wd>

<space/>

<wd l="9950" t="3739" r="10488" b="3898">which</wd>

<space/>

</ln>

<ln l="6115" t="3994" r="10483" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="3994" r="6912" b="4195">performs</wd>

<space/>

<wd l="6984" t="3994" r="8074" b="4152">tokenization</wd>

<space/>

<wd l="8131" t="3994" r="8645" b="4152">based</wd>

<space/>

<wd l="8712" t="4042" r="8928" b="4152">on</wd>

<space/>

<wd l="8995" t="3994" r="9970" b="4195">whitespace</wd>

<space/>

<wd l="10042" t="3994" r="10483" b="4152">char-</wd>

</ln>

<ln l="6125" t="4253" r="6682" b="4402" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="4262" r="6682" b="4402">acters.</wd>

</ln>

</para>

<para l="6120" t="4498" r="10512" b="8448" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="4498" r="10488" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="4498" r="6715" b="4656">One</wd>

<space/>

<wd l="6826" t="4498" r="7027" b="4656">of</wd>

<space/>

<wd l="7109" t="4498" r="7378" b="4656">the</wd>

<space/>

<wd l="7483" t="4517" r="7910" b="4656">most</wd>

<space/>

<wd l="8016" t="4498" r="8870" b="4699">important</wd>

<space/>

<wd l="8966" t="4498" r="9672" b="4699">primary</wd>

<space/>

<wd l="9782" t="4517" r="10210" b="4699">steps</wd>

<space/>

<wd l="10320" t="4498" r="10488" b="4651">in</wd>

<space/>

</ln>

<ln l="6120" t="4752" r="10488" b="4954" baseLine="4901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="4752" r="7291" b="4954">unsupervised</wd>

<space/>

<wd l="7373" t="4752" r="8592" b="4910">normalization</wd>

<space/>

<wd l="8693" t="4771" r="9374" b="4954">systems</wd>

<space/>

<wd l="9470" t="4752" r="9610" b="4910">is</wd>

<space/>

<wd l="9701" t="4771" r="9869" b="4910">to</wd>

<space/>

<wd l="9970" t="4752" r="10488" b="4910">detect</wd>

<space/>

</ln>

<ln l="6125" t="5006" r="10483" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5006" r="6595" b="5165">OOV</wd>

<space/>

<wd l="6677" t="5006" r="7258" b="5165">words.</wd>

<space/>

<wd l="7349" t="5006" r="8126" b="5208">Hanspell</wd>

<space/>

<wd l="8213" t="5006" r="8530" b="5165">and</wd>

<space/>

<wd l="8606" t="5006" r="9077" b="5165">GNU</wd>

<space/>

<wd l="9158" t="5006" r="9730" b="5208">Aspell</wd>

<space/>

<wd l="9816" t="5054" r="10075" b="5165">are</wd>

<space/>

<wd l="10157" t="5026" r="10483" b="5165">two</wd>

<space/>

</ln>

<ln l="6120" t="5256" r="10483" b="5458" baseLine="5405" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5256" r="7171" b="5414">well-known</wd>

<space/>

<wd l="7344" t="5256" r="7747" b="5458">spell</wd>

<space/>

<wd l="7920" t="5256" r="8602" b="5414">checker</wd>

<space/>

<wd l="8770" t="5275" r="9504" b="5458">systems,</wd>

<space/>

<wd l="9677" t="5256" r="10483" b="5448">however,</wd>

<space/>

</ln>

<ln l="6120" t="5510" r="10488" b="5712" baseLine="5659" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5510" r="6691" b="5712">Aspell</wd>

<space/>

<wd l="6835" t="5510" r="7949" b="5712">performance</wd>

<space/>

<wd l="8098" t="5510" r="8237" b="5669">is</wd>

<space/>

<wd l="8386" t="5558" r="8837" b="5669">more</wd>

<space/>

<wd l="8986" t="5530" r="9710" b="5669">accurate</wd>

<space/>

<wd l="9859" t="5558" r="10075" b="5669">on</wd>

<space/>

<wd l="10219" t="5510" r="10488" b="5669">the</wd>

<space/>

</ln>

<ln l="6120" t="5765" r="10483" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5765" r="6600" b="5966">noisy</wd>

<space/>

<wd l="6710" t="5784" r="7042" b="5923">text</wd>

<space/>

<wd l="7157" t="5765" r="7723" b="5966">(Clark</wd>

<space/>

<wd l="7834" t="5765" r="7997" b="5923">&amp;</wd>

<space/>

<wd l="8112" t="5765" r="8659" b="5957">Araki,</wd>

<space/>

<wd l="8784" t="5765" r="9336" b="5966">2011).</wd>

<space/>

<wd l="9466" t="5765" r="9802" b="5923">The</wd>

<space/>

<wd l="9917" t="5765" r="10483" b="5966">Aspell</wd>

<space/>

</ln>

<ln l="6125" t="6014" r="10488" b="6216" baseLine="6163" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6014" r="7018" b="6216">dictionary</wd>

<space/>

<wd l="7176" t="6014" r="7315" b="6173">is</wd>

<space/>

<wd l="7478" t="6014" r="8146" b="6173">utilized</wd>

<space/>

<wd l="8299" t="6034" r="8467" b="6173">to</wd>

<space/>

<wd l="8630" t="6014" r="9595" b="6216">distinguish</wd>

<space/>

<wd l="9749" t="6014" r="10488" b="6173">between</wd>

<space/>

</ln>

<ln l="6125" t="6269" r="10483" b="6470" baseLine="6418" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6269" r="6595" b="6427">OOV</wd>

<space/>

<wd l="6706" t="6269" r="7022" b="6427">and</wd>

<space/>

<wd l="7128" t="6269" r="7867" b="6427">standard</wd>

<space/>

<wd l="7963" t="6269" r="8635" b="6470">English</wd>

<space/>

<wd l="8736" t="6269" r="9317" b="6427">words.</wd>

<space/>

<wd l="9432" t="6274" r="9610" b="6422">In</wd>

<space/>

<wd l="9715" t="6269" r="10483" b="6461">addition,</wd>

<space/>

</ln>

<ln l="6120" t="6523" r="10488" b="6725" baseLine="6672" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6571" r="6374" b="6682">we</wd>

<space/>

<wd l="6480" t="6523" r="6893" b="6682">used</wd>

<space/>

<wd l="6998" t="6571" r="7488" b="6682">seven</wd>

<space/>

<wd l="7594" t="6523" r="8218" b="6725">regular</wd>

<space/>

<wd l="8323" t="6523" r="9264" b="6725">expression</wd>

<space/>

<wd l="9365" t="6523" r="9840" b="6715">rules,</wd>

<space/>

<wd l="9950" t="6523" r="10488" b="6682">which</wd>

<space/>

</ln>

<ln l="6120" t="6773" r="10478" b="6974" baseLine="6922" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6821" r="6547" b="6931">were</wd>

<space/>

<wd l="6614" t="6773" r="7555" b="6931">introduced</wd>

<space/>

<wd l="7608" t="6773" r="7834" b="6974">by</wd>

<space/>

<wd l="7901" t="6773" r="8506" b="6965">Saloot,</wd>

<space/>

<wd l="8573" t="6773" r="9019" b="6965">Idris,</wd>

<space/>

<wd l="9091" t="6773" r="9413" b="6931">and</wd>

<space/>

<wd l="9466" t="6773" r="9782" b="6931">Aw</wd>

<space/>

<wd l="9850" t="6773" r="10478" b="6974">(2014).</wd>

<space/>

</ln>

<ln l="6125" t="7027" r="10493" b="7229" baseLine="7176" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7027" r="6509" b="7186">This</wd>

<space/>

<wd l="6638" t="7027" r="7094" b="7229">helps</wd>

<space/>

<wd l="7219" t="7046" r="7387" b="7186">to</wd>

<space/>

<wd l="7522" t="7027" r="8040" b="7186">detect</wd>

<space/>

<wd l="8155" t="7075" r="8741" b="7229">proper</wd>

<space/>

<wd l="8861" t="7075" r="9432" b="7219">nouns,</wd>

<space/>

<wd l="9566" t="7027" r="10046" b="7186">email</wd>

<space/>

<wd l="10176" t="7027" r="10493" b="7186">and</wd>

<space/>

</ln>

<ln l="6120" t="7282" r="10493" b="7483" baseLine="7430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="7286" r="6562" b="7440">URL</wd>

<space/>

<wd l="6696" t="7282" r="7589" b="7474">addresses,</wd>

<space/>

<wd l="7728" t="7282" r="8376" b="7440">Twitter</wd>

<space/>

<wd l="8515" t="7282" r="9115" b="7483">special</wd>

<space/>

<wd l="9259" t="7282" r="10032" b="7483">symbols,</wd>

<space/>

<wd l="10176" t="7282" r="10493" b="7440">and</wd>

<space/>

</ln>

<ln l="6125" t="7531" r="10483" b="7733" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7531" r="6658" b="7733">digits.</wd>

<space/>

<wd l="6739" t="7531" r="7075" b="7690">The</wd>

<space/>

<wd l="7142" t="7531" r="7910" b="7733">potential</wd>

<space/>

<wd l="7987" t="7579" r="8491" b="7690">errors</wd>

<space/>

<wd l="8568" t="7531" r="8736" b="7685">in</wd>

<space/>

<wd l="8803" t="7531" r="9072" b="7690">the</wd>

<space/>

<wd l="9149" t="7531" r="9619" b="7690">OOV</wd>

<space/>

<wd l="9686" t="7531" r="10147" b="7690">word</wd>

<space/>

<wd l="10214" t="7531" r="10483" b="7690">de-</wd>

</ln>

<ln l="6120" t="7786" r="10512" b="7987" baseLine="7934" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="7786" r="6720" b="7944">tection</wd>

<space/>

<wd l="6806" t="7805" r="7147" b="7987">step</wd>

<space/>

<wd l="7224" t="7786" r="7781" b="7944">would</wd>

<space/>

<wd l="7848" t="7805" r="8131" b="7944">not</wd>

<space/>

<wd l="8208" t="7786" r="8702" b="7944">affect</wd>

<space/>

<wd l="8774" t="7786" r="9043" b="7944">the</wd>

<space/>

<wd l="9115" t="7786" r="10229" b="7987">performance</wd>

<space/>

<wd l="10310" t="7786" r="10512" b="7944">of</wd>

<space/>

</ln>

<ln l="6120" t="8035" r="10488" b="8237" baseLine="8189" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="8035" r="6389" b="8194">the</wd>

<space/>

<wd l="6446" t="8035" r="7670" b="8194">normalization</wd>

<space/>

<wd l="7733" t="8054" r="8338" b="8237">system</wd>

<space/>

<wd l="8400" t="8035" r="8842" b="8194">since</wd>

<space/>

<wd l="8899" t="8035" r="9163" b="8194">the</wd>

<space/>

<wd l="9226" t="8035" r="9960" b="8194">detected</wd>

<space/>

<wd l="10018" t="8035" r="10488" b="8194">OOV</wd>

<space/>

</ln>

<ln l="6120" t="8290" r="9835" b="8448" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="8290" r="6581" b="8448">word</wd>

<space/>

<wd l="6629" t="8290" r="6970" b="8448">will</wd>

<space/>

<wd l="7022" t="8290" r="7234" b="8448">be</wd>

<space/>

<wd l="7296" t="8290" r="8054" b="8448">included</wd>

<space/>

<wd l="8107" t="8290" r="8275" b="8443">in</wd>

<space/>

<wd l="8328" t="8290" r="8592" b="8448">the</wd>

<space/>

<wd l="8654" t="8290" r="9494" b="8448">candidate</wd>

<space/>

<wd l="9557" t="8309" r="9835" b="8448">set.</wd>

</ln>

</para>

<para l="6125" t="8750" r="8770" b="8971" alignment="left" spaceBefore="206" lsp="exactly" lspExact="276" language="en">

<ln l="6125" t="8750" r="8770" b="8971" baseLine="8918" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="6125" t="8755" r="6235" b="8918">4</wd>

<space/>

<wd l="6562" t="8755" r="7618" b="8923">Candidate</wd>

<space/>

<wd l="7685" t="8755" r="8770" b="8971">generation</wd>

</ln>

</para>

<para l="6120" t="9182" r="10493" b="13440" alignment="justified" spaceBefore="166" lsp="exactly" lspExact="253" language="en">

<ln l="6120" t="9182" r="10493" b="9384" baseLine="9331" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="9187" r="6432" b="9341">For</wd>

<space/>

<wd l="6518" t="9182" r="6917" b="9341">each</wd>

<space/>

<wd l="7003" t="9182" r="7488" b="9384">given</wd>

<space/>

<wd l="7574" t="9182" r="8045" b="9341">OOV</wd>

<space/>

<wd l="8131" t="9182" r="8630" b="9374">word,</wd>

<space/>

<wd l="8726" t="9230" r="8822" b="9341">a</wd>

<space/>

<wd l="8914" t="9202" r="9149" b="9341">set</wd>

<space/>

<wd l="9235" t="9182" r="9437" b="9341">of</wd>

<space/>

<wd l="9499" t="9182" r="10493" b="9341">normalized</wd>

<space/>

</ln>

<ln l="6125" t="9437" r="10483" b="9638" baseLine="9586" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9437" r="7046" b="9595">candidates</wd>

<space/>

<wd l="7152" t="9437" r="7291" b="9595">is</wd>

<space/>

<wd l="7397" t="9437" r="8256" b="9638">generated</wd>

<space/>

<wd l="8347" t="9437" r="8616" b="9595">via</wd>

<space/>

<wd l="8712" t="9437" r="9082" b="9595">four</wd>

<space/>

<wd l="9178" t="9437" r="9931" b="9595">different</wd>

<space/>

<wd l="10027" t="9437" r="10483" b="9595">mod-</wd>

</ln>

<ln l="6120" t="9686" r="10483" b="9845" baseLine="9840" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="9686" r="6523" b="9845">ules.</wd>

<space/>

<wd l="6624" t="9686" r="6960" b="9845">The</wd>

<space/>

<wd l="7051" t="9686" r="7402" b="9845">first</wd>

<space/>

<wd l="7488" t="9686" r="8146" b="9845">module</wd>

<space/>

<wd l="8242" t="9706" r="8990" b="9845">executes</wd>

<space/>

<wd l="9086" t="9734" r="9182" b="9845">a</wd>

<space/>

<wd l="9269" t="9686" r="9850" b="9845">lexical</wd>

<space/>

<wd l="9946" t="9686" r="10483" b="9845">candi-</wd>

</ln>

<ln l="6125" t="9941" r="10488" b="10142" baseLine="10090" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9941" r="6490" b="10099">date</wd>

<space/>

<wd l="6576" t="9941" r="7550" b="10142">generation,</wd>

<space/>

<wd l="7642" t="9941" r="8179" b="10099">which</wd>

<space/>

<wd l="8266" t="9941" r="8400" b="10099">is</wd>

<space/>

<wd l="8491" t="9941" r="9494" b="10142">extensively</wd>

<space/>

<wd l="9576" t="9941" r="10238" b="10099">utilized</wd>

<space/>

<wd l="10320" t="9941" r="10488" b="10094">in</wd>

<space/>

</ln>

<ln l="6130" t="10195" r="10488" b="10397" baseLine="10344" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10195" r="6533" b="10397">spell</wd>

<space/>

<wd l="6658" t="10195" r="7344" b="10354">checker</wd>

<space/>

<wd l="7464" t="10214" r="8194" b="10397">systems.</wd>

<space/>

<wd l="8328" t="10200" r="8458" b="10354">It</wd>

<space/>

<wd l="8582" t="10195" r="9442" b="10354">calculates</wd>

<space/>

<wd l="9566" t="10195" r="10488" b="10354">candidates</wd>

<space/>

</ln>

<ln l="6120" t="10450" r="10483" b="10651" baseLine="10603">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6120" t="10450" r="6686" b="10608">within</wd>

<space/>

<wd l="6773" t="10498" r="6869" b="10608">a</wd>

<space/>

<wd l="6955" t="10450" r="7666" b="10608">distance</wd>

<space/>

<wd l="7757" t="10450" r="7958" b="10608">of</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7992" t="10454" r="8117" b="10603">T</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8184" t="10450" r="8510" b="10608">edit</wd>

<space/>

<wd l="8597" t="10450" r="9504" b="10651">operations</wd>

<space/>

<wd l="9595" t="10450" r="9797" b="10608">of</wd>

<space/>

<wd l="9854" t="10450" r="10123" b="10608">the</wd>

<space/>

<wd l="10214" t="10450" r="10483" b="10608">de-</wd>

</run>

</ln>

<ln l="6120" t="10704" r="10483" b="10906" baseLine="10858" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10704" r="6653" b="10862">tected</wd>

<space/>

<wd l="6778" t="10704" r="7248" b="10862">OOV</wd>

<space/>

<wd l="7378" t="10704" r="7958" b="10862">words.</wd>

<space/>

<wd l="8093" t="10709" r="8462" b="10862">Han</wd>

<space/>

<wd l="8587" t="10704" r="8909" b="10862">and</wd>

<space/>

<wd l="9029" t="10704" r="9778" b="10862">Baldwin</wd>

<space/>

<wd l="9907" t="10704" r="10483" b="10906">(2011)</wd>

<space/>

</ln>

<ln l="6130" t="10963" r="10483" b="11165" baseLine="11112">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6130" t="10963" r="6643" b="11122">stated</wd>

<space/>

<wd l="6725" t="10963" r="7056" b="11122">that</wd>

<space/>

<wd l="7147" t="10963" r="7622" b="11122">when</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7685" t="10968" r="7810" b="11117">T</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7877" t="10963" r="8016" b="11122">is</wd>

<space/>

<wd l="8117" t="10963" r="8438" b="11122">less</wd>

<space/>

<wd l="8534" t="10963" r="8904" b="11122">than</wd>

<space/>

<wd l="9005" t="11011" r="9187" b="11122">or</wd>

<space/>

<wd l="9278" t="10963" r="9749" b="11165">equal</wd>

<space/>

<wd l="9840" t="10982" r="10008" b="11122">to</wd>

<space/>

<wd l="10104" t="10982" r="10483" b="11155">two,</wd>

<space/>

</run>

</ln>

<ln l="6120" t="11218" r="10483" b="11419" baseLine="11366" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11218" r="6389" b="11376">the</wd>

<space/>

<wd l="6480" t="11218" r="6898" b="11376">level</wd>

<space/>

<wd l="6994" t="11218" r="7190" b="11376">of</wd>

<space/>

<wd l="7253" t="11218" r="7738" b="11376">recall</wd>

<space/>

<wd l="7829" t="11218" r="7968" b="11376">is</wd>

<space/>

<wd l="8059" t="11218" r="8448" b="11419">high</wd>

<space/>

<wd l="8534" t="11218" r="9221" b="11419">enough.</wd>

<space/>

<wd l="9322" t="11218" r="9658" b="11376">The</wd>

<space/>

<wd l="9749" t="11218" r="10075" b="11376">edit</wd>

<space/>

<wd l="10162" t="11218" r="10483" b="11376">dis-</wd>

</ln>

<ln l="6120" t="11467" r="10488" b="11669" baseLine="11621" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11486" r="6586" b="11626">tance</wd>

<space/>

<wd l="6667" t="11467" r="6802" b="11626">is</wd>

<space/>

<wd l="6878" t="11467" r="7147" b="11626">the</wd>

<space/>

<wd l="7224" t="11467" r="7896" b="11626">number</wd>

<space/>

<wd l="7973" t="11467" r="8174" b="11626">of</wd>

<space/>

<wd l="8232" t="11467" r="8880" b="11669">applied</wd>

<space/>

<wd l="8952" t="11467" r="9360" b="11626">edits</wd>

<space/>

<wd l="9442" t="11467" r="9610" b="11621">in</wd>

<space/>

<wd l="9686" t="11467" r="10488" b="11669">changing</wd>

<space/>

</ln>

<ln l="6125" t="11722" r="10478" b="11914" baseLine="11870" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11770" r="6437" b="11880">one</wd>

<space/>

<wd l="6504" t="11722" r="6960" b="11880">word</wd>

<space/>

<wd l="7013" t="11741" r="7181" b="11880">to</wd>

<space/>

<wd l="7253" t="11722" r="7954" b="11880">another.</wd>

<space/>

<wd l="8026" t="11722" r="8294" b="11875">An</wd>

<space/>

<wd l="8362" t="11722" r="8683" b="11880">edit</wd>

<space/>

<wd l="8750" t="11722" r="9240" b="11880">could</wd>

<space/>

<wd l="9293" t="11722" r="9499" b="11880">be</wd>

<space/>

<wd l="9571" t="11770" r="9667" b="11880">a</wd>

<space/>

<wd l="9730" t="11722" r="10478" b="11914">deletion,</wd>

<space/>

</ln>

<ln l="6120" t="11976" r="10488" b="12178" baseLine="12125" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11976" r="7310" b="12178">transposition,</wd>

<space/>

<wd l="7411" t="11976" r="8285" b="12168">alteration,</wd>

<space/>

<wd l="8390" t="12024" r="8573" b="12134">or</wd>

<space/>

<wd l="8664" t="11976" r="9475" b="12134">insertion.</wd>

<space/>

<wd l="9586" t="11976" r="10219" b="12134">Studies</wd>

<space/>

<wd l="10320" t="11976" r="10488" b="12130">in</wd>

<space/>

</ln>

<ln l="6130" t="12226" r="10488" b="12427" baseLine="12379" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="12226" r="6821" b="12427">spelling</wd>

<space/>

<wd l="6941" t="12226" r="7829" b="12384">correction</wd>

<space/>

<wd l="7949" t="12226" r="8462" b="12384">found</wd>

<space/>

<wd l="8573" t="12226" r="8904" b="12384">that</wd>

<space/>

<wd l="9029" t="12274" r="9336" b="12384">one</wd>

<space/>

<wd l="9461" t="12226" r="10037" b="12384">lexical</wd>

<space/>

<wd l="10166" t="12226" r="10488" b="12384">edit</wd>

<space/>

</ln>

<ln l="6125" t="12480" r="10483" b="12672" baseLine="12634" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12480" r="6840" b="12638">distance</wd>

<space/>

<wd l="6936" t="12528" r="7502" b="12638">covers</wd>

<space/>

<wd l="7603" t="12480" r="7987" b="12643">80%</wd>

<space/>

<wd l="8078" t="12499" r="8246" b="12638">to</wd>

<space/>

<wd l="8342" t="12480" r="8736" b="12643">95%</wd>

<space/>

<wd l="8832" t="12480" r="9034" b="12638">of</wd>

<space/>

<wd l="9106" t="12528" r="9662" b="12672">errors,</wd>

<space/>

<wd l="9758" t="12480" r="10075" b="12638">and</wd>

<space/>

<wd l="10157" t="12499" r="10483" b="12638">two</wd>

<space/>

</ln>

<ln l="6125" t="12734" r="10483" b="12898" baseLine="12883" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12734" r="6706" b="12893">lexical</wd>

<space/>

<wd l="6782" t="12734" r="7109" b="12893">edit</wd>

<space/>

<wd l="7186" t="12734" r="7982" b="12893">distances</wd>

<space/>

<wd l="8059" t="12782" r="8544" b="12893">cover</wd>

<space/>

<wd l="8616" t="12734" r="9010" b="12898">98%</wd>

<space/>

<wd l="9091" t="12734" r="9288" b="12893">of</wd>

<space/>

<wd l="9336" t="12734" r="9816" b="12893">them.</wd>

<space/>

<wd l="9902" t="12734" r="10483" b="12893">There-</wd>

</ln>

<ln l="6125" t="12984" r="10493" b="13176" baseLine="13138" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12984" r="6528" b="13176">fore,</wd>

<space/>

<wd l="6590" t="12984" r="6970" b="13142">here</wd>

<space/>

<wd l="7032" t="13032" r="7282" b="13142">we</wd>

<space/>

<wd l="7344" t="13032" r="7637" b="13142">use</wd>

<space/>

<wd l="7704" t="12984" r="8280" b="13142">lexical</wd>

<space/>

<wd l="8347" t="12984" r="9211" b="13142">variations</wd>

<space/>

<wd l="9274" t="12984" r="9667" b="13142">with</wd>

<space/>

<wd l="9730" t="12984" r="10051" b="13142">less</wd>

<space/>

<wd l="10114" t="12984" r="10493" b="13142">than</wd>

<space/>

</ln>

<ln l="6125" t="13238" r="8736" b="13440" baseLine="13392" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13286" r="6307" b="13397">or</wd>

<space/>

<wd l="6365" t="13238" r="6835" b="13440">equal</wd>

<space/>

<wd l="6893" t="13258" r="7061" b="13397">to</wd>

<space/>

<wd l="7118" t="13258" r="7445" b="13397">two</wd>

<space/>

<wd l="7507" t="13238" r="7834" b="13397">edit</wd>

<space/>

<wd l="7896" t="13238" r="8736" b="13397">distances.</wd>

</ln>

</para>

<para l="6120" t="13493" r="10488" b="15274" alignment="justified" spaceBefore="25" spaceAfter="70" fli="216" lsp="exactly" lspExact="262" language="en">

<ln l="6350" t="13493" r="10478" b="13699" baseLine="13646">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6350" t="13502" r="6658" b="13656">For</wd>

<space/>

<wd l="6758" t="13546" r="6854" b="13656">a</wd>

<space/>

<wd l="6950" t="13498" r="7411" b="13656">word</wd>

<space/>

<wd l="7507" t="13498" r="7709" b="13656">of</wd>

<space/>

<wd l="7790" t="13498" r="8338" b="13699">length</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8400" t="13546" r="8525" b="13656">n</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8602" t="13498" r="9538" b="13690">characters,</wd>

<space/>

</run>

<wd l="9624" t="13493" r="9979" b="13656"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">54</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="10046" t="13517" r="10186" b="13661">+</wd>

<space/>

<wd l="10253" t="13493" r="10478" b="13656">25</wd>

<space/>

</run>

</ln>

<ln l="6125" t="13752" r="10483" b="13954" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13752" r="7306" b="13910">combinations</wd>

<space/>

<wd l="7387" t="13752" r="7728" b="13910">will</wd>

<space/>

<wd l="7805" t="13752" r="8016" b="13910">be</wd>

<space/>

<wd l="8102" t="13752" r="8957" b="13954">generated</wd>

<space/>

<wd l="9034" t="13752" r="9427" b="13910">with</wd>

<space/>

<wd l="9509" t="13800" r="9821" b="13910">one</wd>

<space/>

<wd l="9907" t="13752" r="10483" b="13910">lexical</wd>

<space/>

</ln>

<ln l="6125" t="14002" r="10483" b="14203" baseLine="14155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14002" r="6451" b="14160">edit</wd>

<space/>

<wd l="6547" t="14002" r="7262" b="14160">distance</wd>

<space/>

<wd l="7354" t="14002" r="7834" b="14203">using</wd>

<space/>

<wd l="7925" t="14002" r="8290" b="14160">four</wd>

<space/>

<wd l="8376" t="14002" r="9235" b="14203">reshaping</wd>

<space/>

<wd l="9331" t="14002" r="10195" b="14203">strategies:</wd>

<space/>

<wd l="10330" t="14002" r="10483" b="14203">1)</wd>

<space/>

</ln>

<ln l="6120" t="14256" r="10483" b="14458" baseLine="14405" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="14256" r="6883" b="14414">Deletion</wd>

<space/>

<wd l="6946" t="14275" r="7632" b="14458">strategy</wd>

<space/>

<wd l="7694" t="14256" r="8592" b="14414">eliminates</wd>

<space/>

<wd l="8659" t="14256" r="9542" b="14414">characters</wd>

<space/>

<wd l="9610" t="14256" r="9773" b="14410">in</wd>

<space/>

<wd l="9835" t="14256" r="10046" b="14414">all</wd>

<space/>

<wd l="10104" t="14304" r="10483" b="14458">pos-</wd>

</ln>

<ln l="6130" t="14491" r="10488" b="14731" baseLine="14678">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6130" t="14530" r="6538" b="14688">sible</wd>

<space/>

<wd l="6643" t="14530" r="7435" b="14731">positions</wd>

<space/>

<wd l="7555" t="14530" r="7930" b="14731">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8054" t="14582" r="8347" b="14688">aer</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1200" fontFace="MS Mincho" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="8453" t="14563" r="8659" b="14645">—</wd>

<space/>

</run>

<wd l="8803" t="14582" r="9029" b="14722"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">er</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="9149" t="14582" r="9389" b="14722"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">ar</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="9509" t="14530" r="9835" b="14731"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">ae</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9950" t="14530" r="10488" b="14688">which</wd>

<space/>

</run>

</ln>

<ln l="6125" t="14798" r="10483" b="15000" baseLine="14947">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="14818" r="6950" b="15000">generates</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7008" t="14846" r="7133" b="14957">n</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7195" t="14798" r="8424" b="14957">combinations.</wd>

<space/>

<wd l="8510" t="14798" r="8678" b="15000">2)</wd>

<space/>

<wd l="8760" t="14798" r="9965" b="15000">Transposition</wd>

<space/>

<wd l="10046" t="14818" r="10483" b="14957">strat-</wd>

</run>

</ln>

<ln l="6125" t="15034" r="10478" b="15274" baseLine="15221">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="MS Mincho" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="15120" r="6437" b="15274">egy</wd>

<space/>

<wd l="6514" t="15072" r="7258" b="15230">switches</wd>

<space/>

<wd l="7330" t="15091" r="7656" b="15230">two</wd>

<space/>

<wd l="7728" t="15072" r="8458" b="15274">adjacent</wd>

<space/>

<wd l="8525" t="15072" r="9408" b="15230">characters</wd>

<space/>

<wd l="9485" t="15072" r="9859" b="15274">(e.g.</wd>

<space/>

<wd l="9941" t="15120" r="10210" b="15230">aer</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1200" fontFace="MS Mincho" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="10272" t="15106" r="10478" b="15187">—</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="1401" t="15746" r="10517" b="15975">

<para l="5804" t="15792" r="6128" b="15941" alignment="centered" lsp="exactly" lspExact="223" language="en">

<ln l="5870" t="15792" r="6062" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="38">

<wd l="5870" t="15792" r="6062" b="15941">21</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1411" marginTop="462" marginRight="1311" marginBottom="1302" offsetX="-46" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1411" t="462" r="10598" b="15307">

<column l="1411" t="462" r="5794" b="15307">

<para l="1411" t="1469" r="5789" b="5496" alignment="justified" spaceBefore="1033" lsp="exactly" lspExact="253" language="en">

<ln l="1421" t="1469" r="5774" b="1670" baseLine="1618">

<wd l="1421" t="1522" r="1762" b="1661"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ear</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="1882" t="1469" r="2290" b="1670"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">are</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2405" t="1469" r="2942" b="1627">which</wd>

<space/>

<wd l="3058" t="1488" r="3874" b="1670">generates</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3955" t="1517" r="4080" b="1627">n</wd>

<space/>

<wd l="4147" t="1550" r="4286" b="1570">−</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4363" t="1474" r="4450" b="1622">1</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4546" t="1469" r="5774" b="1627">combinations.</wd>

<space/>

</run>

</ln>

<ln l="1421" t="1718" r="5789" b="1920" baseLine="1872" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1718" r="1594" b="1920">3)</wd>

<space/>

<wd l="1694" t="1718" r="2587" b="1877">Alteration</wd>

<space/>

<wd l="2688" t="1738" r="3370" b="1920">strategy</wd>

<space/>

<wd l="3470" t="1718" r="4387" b="1877">substitutes</wd>

<space/>

<wd l="4488" t="1718" r="4886" b="1877">each</wd>

<space/>

<wd l="4982" t="1718" r="5789" b="1877">character</wd>

<space/>

</ln>

<ln l="1416" t="1954" r="5779" b="2194" baseLine="2146">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1416" t="1992" r="1810" b="2150">with</wd>

<space/>

<wd l="1896" t="1992" r="2107" b="2150">all</wd>

<space/>

<wd l="2198" t="1992" r="2870" b="2194">English</wd>

<space/>

<wd l="2957" t="1992" r="3778" b="2194">alphabets</wd>

<space/>

<wd l="3869" t="1992" r="4243" b="2194">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4344" t="2045" r="4637" b="2150">aer</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="4718" t="2026" r="4925" b="2107">—</wd>

<space/>

</run>

<wd l="5021" t="1992" r="5357" b="2184"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ber</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="5453" t="2045" r="5779" b="2184"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">cer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1416" t="2266" r="5779" b="2467" baseLine="2414">

<wd l="1416" t="2266" r="1762" b="2458"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">der</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="1838" t="2318" r="2160" b="2458"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">eer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="2194" t="2266" r="2525" b="2467"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">fer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="2587" t="2318" r="2938" b="2467"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ger</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="3010" t="2266" r="3350" b="2458"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">her</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3422" t="2266" r="3854" b="2467">etc.),</wd>

<space/>

<wd l="3922" t="2266" r="4464" b="2424">which</wd>

<space/>

<wd l="4531" t="2285" r="5352" b="2467">generates</wd>

<space/>

</run>

<wd l="5419" t="2270" r="5779" b="2424"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">26</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1421" t="2515" r="5784" b="2717" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="2515" r="2650" b="2674">combinations.</wd>

<space/>

<wd l="2722" t="2515" r="2894" b="2717">4)</wd>

<space/>

<wd l="2966" t="2515" r="3744" b="2674">Insertion</wd>

<space/>

<wd l="3816" t="2534" r="4502" b="2717">strategy</wd>

<space/>

<wd l="4555" t="2563" r="5390" b="2717">presumes</wd>

<space/>

<wd l="5453" t="2515" r="5784" b="2674">that</wd>

<space/>

</ln>

<ln l="1421" t="2770" r="5779" b="2971" baseLine="2918" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="2818" r="1517" b="2928">a</wd>

<space/>

<wd l="1598" t="2770" r="2050" b="2928">letter</wd>

<space/>

<wd l="2126" t="2770" r="2261" b="2928">is</wd>

<space/>

<wd l="2347" t="2770" r="3110" b="2971">dropped,</wd>

<space/>

<wd l="3197" t="2770" r="3557" b="2928">thus</wd>

<space/>

<wd l="3643" t="2770" r="4238" b="2971">adding</wd>

<space/>

<wd l="4315" t="2770" r="4526" b="2928">all</wd>

<space/>

<wd l="4608" t="2770" r="4877" b="2928">the</wd>

<space/>

<wd l="4958" t="2770" r="5779" b="2971">alphabets</wd>

<space/>

</ln>

<ln l="1411" t="3005" r="5779" b="3245" baseLine="3192">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1411" t="3043" r="2150" b="3202">between</wd>

<space/>

<wd l="2222" t="3043" r="3106" b="3202">characters</wd>

<space/>

<wd l="3182" t="3043" r="3557" b="3245">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3638" t="3096" r="3936" b="3202">aer</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="4003" t="3077" r="4210" b="3158">—</wd>

<space/>

</run>

<wd l="4291" t="3096" r="4738" b="3235"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">aaer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="4819" t="3043" r="5266" b="3235"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">baer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="5342" t="3096" r="5779" b="3235"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">caer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1416" t="3307" r="5779" b="3509" baseLine="3461">

<wd l="1416" t="3307" r="1872" b="3499"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">daer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="1968" t="3360" r="2400" b="3499"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">eaer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="2453" t="3307" r="2894" b="3509"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">faer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="2981" t="3360" r="3442" b="3509"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">gaer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="3533" t="3307" r="3984" b="3499"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">haer</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4080" t="3307" r="4507" b="3509">etc.),</wd>

<space/>

<wd l="4603" t="3307" r="5141" b="3466">which</wd>

<space/>

<wd l="5227" t="3355" r="5779" b="3509">gener-</wd>

</run>

</ln>

<ln l="1421" t="3562" r="5779" b="3773" baseLine="3715">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1421" t="3586" r="1757" b="3725">ates</wd>

<space/>

</run>

<wd l="1843" t="3562" r="2294" b="3773"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">26(</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2362" t="3586" r="2501" b="3730">+</wd>

<space/>

<wd l="2578" t="3562" r="2760" b="3773">1)</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2851" t="3566" r="4080" b="3725">combinations.</wd>

<space/>

<wd l="4195" t="3566" r="4867" b="3768">Finally,</wd>

<space/>

<wd l="4987" t="3566" r="5410" b="3725">from</wd>

<space/>

<wd l="5510" t="3566" r="5779" b="3725">the</wd>

<space/>

</run>

</ln>

<ln l="1421" t="3821" r="5779" b="4013" baseLine="3970" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="3821" r="2203" b="3979">achieved</wd>

<space/>

<wd l="2309" t="3821" r="3538" b="4013">combinations,</wd>

<space/>

<wd l="3653" t="3821" r="4397" b="3979">standard</wd>

<space/>

<wd l="4493" t="3821" r="5026" b="3979">words</wd>

<space/>

<wd l="5131" t="3821" r="5467" b="3979">will</wd>

<space/>

<wd l="5568" t="3821" r="5779" b="3979">be</wd>

<space/>

</ln>

<ln l="1426" t="4070" r="5774" b="4272" baseLine="4224" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="4070" r="2131" b="4229">selected</wd>

<space/>

<wd l="2232" t="4070" r="2712" b="4272">using</wd>

<space/>

<wd l="2813" t="4070" r="3082" b="4229">the</wd>

<space/>

<wd l="3192" t="4070" r="3758" b="4272">Aspell</wd>

<space/>

<wd l="3874" t="4070" r="4805" b="4272">dictionary.</wd>

<space/>

<wd l="4920" t="4075" r="5774" b="4262">However,</wd>

<space/>

</ln>

<ln l="1416" t="4325" r="5789" b="4526" baseLine="4474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="4373" r="1906" b="4526">many</wd>

<space/>

<wd l="1992" t="4325" r="2462" b="4483">OOV</wd>

<space/>

<wd l="2549" t="4325" r="3082" b="4483">words</wd>

<space/>

<wd l="3173" t="4325" r="3341" b="4478">in</wd>

<space/>

<wd l="3427" t="4325" r="4075" b="4483">Twitter</wd>

<space/>

<wd l="4162" t="4373" r="4421" b="4483">are</wd>

<space/>

<wd l="4512" t="4325" r="4944" b="4526">quite</wd>

<space/>

<wd l="5035" t="4325" r="5275" b="4483">far</wd>

<space/>

<wd l="5362" t="4325" r="5789" b="4483">from</wd>

<space/>

</ln>

<ln l="1416" t="4579" r="5784" b="4781" baseLine="4728" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="4579" r="1824" b="4738">their</wd>

<space/>

<wd l="1896" t="4598" r="2400" b="4781">target</wd>

<space/>

<wd l="2477" t="4579" r="2645" b="4733">in</wd>

<space/>

<wd l="2722" t="4598" r="3125" b="4738">term</wd>

<space/>

<wd l="3202" t="4579" r="3403" b="4738">of</wd>

<space/>

<wd l="3461" t="4579" r="3787" b="4738">edit</wd>

<space/>

<wd l="3864" t="4579" r="4579" b="4738">distance</wd>

<space/>

<wd l="4661" t="4579" r="5539" b="4781">especially</wd>

<space/>

<wd l="5616" t="4579" r="5784" b="4733">in</wd>

<space/>

</ln>

<ln l="1416" t="4829" r="5779" b="5021" baseLine="4982" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="4848" r="1901" b="4987">terms</wd>

<space/>

<wd l="2011" t="4829" r="2213" b="4987">of</wd>

<space/>

<wd l="2294" t="4829" r="3077" b="4987">deletions</wd>

<space/>

<wd l="3187" t="4829" r="3504" b="4987">and</wd>

<space/>

<wd l="3605" t="4829" r="4752" b="4987">substitutions.</wd>

<space/>

<wd l="4867" t="4829" r="5779" b="5021">Therefore,</wd>

<space/>

</ln>

<ln l="1416" t="5083" r="5789" b="5285" baseLine="5237" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5131" r="1670" b="5242">we</wd>

<space/>

<wd l="1800" t="5083" r="2654" b="5285">generated</wd>

<space/>

<wd l="2770" t="5131" r="3216" b="5242">more</wd>

<space/>

<wd l="3346" t="5083" r="4262" b="5242">candidates</wd>

<space/>

<wd l="4387" t="5083" r="4661" b="5242">via</wd>

<space/>

<wd l="4776" t="5083" r="5213" b="5242">three</wd>

<space/>

<wd l="5338" t="5083" r="5789" b="5242">other</wd>

<space/>

</ln>

<ln l="1416" t="5338" r="2208" b="5496" baseLine="5486" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5338" r="2208" b="5496">methods.</wd>

</ln>

</para>

<para l="1411" t="5587" r="5784" b="9835" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1656" t="5587" r="5784" b="5789" baseLine="5741" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1656" t="5587" r="2299" b="5746">Similar</wd>

<space/>

<wd l="2366" t="5606" r="2534" b="5746">to</wd>

<space/>

<wd l="2606" t="5587" r="2875" b="5746">the</wd>

<space/>

<wd l="2957" t="5587" r="3547" b="5789">speech</wd>

<space/>

<wd l="3619" t="5587" r="4622" b="5789">recognition</wd>

<space/>

<wd l="4704" t="5606" r="5438" b="5789">systems,</wd>

<space/>

<wd l="5515" t="5587" r="5784" b="5746">the</wd>

<space/>

</ln>

<ln l="1426" t="5842" r="5784" b="6043" baseLine="5995" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5842" r="2035" b="6000">second</wd>

<space/>

<wd l="2155" t="5842" r="2813" b="6000">module</wd>

<space/>

<wd l="2942" t="5861" r="3763" b="6043">generates</wd>

<space/>

<wd l="3893" t="5842" r="4814" b="6000">candidates</wd>

<space/>

<wd l="4934" t="5842" r="5448" b="6000">based</wd>

<space/>

<wd l="5568" t="5890" r="5784" b="6000">on</wd>

<space/>

</ln>

<ln l="1411" t="6096" r="5779" b="6298" baseLine="6245" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="6096" r="2222" b="6298">phoneme</wd>

<space/>

<wd l="2352" t="6096" r="3000" b="6254">sounds.</wd>

<space/>

<wd l="3130" t="6096" r="3581" b="6288">First,</wd>

<space/>

<wd l="3710" t="6096" r="4570" b="6298">grapheme</wd>

<space/>

<wd l="4690" t="6115" r="4858" b="6254">to</wd>

<space/>

<wd l="4973" t="6096" r="5779" b="6298">phoneme</wd>

<space/>

</ln>

<ln l="1421" t="6350" r="5779" b="6552" baseLine="6499" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="6350" r="2386" b="6509">conversion</wd>

<space/>

<wd l="2458" t="6350" r="2597" b="6509">is</wd>

<space/>

<wd l="2669" t="6350" r="3595" b="6552">performed</wd>

<space/>

<wd l="3662" t="6350" r="4138" b="6552">using</wd>

<space/>

<wd l="4210" t="6350" r="4478" b="6509">the</wd>

<space/>

<wd l="4550" t="6350" r="5779" b="6509">Phonetisaurus</wd>

<space/>

</ln>

<ln l="1416" t="6600" r="5774" b="6802" baseLine="6749" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="6600" r="1757" b="6758">tool</wd>

<space/>

<wd l="1824" t="6600" r="2525" b="6802">(Novak,</wd>

<space/>

<wd l="2587" t="6605" r="3115" b="6802">Yang,</wd>

<space/>

<wd l="3178" t="6600" r="4224" b="6792">Minematsu,</wd>

<space/>

<wd l="4291" t="6600" r="4454" b="6758">&amp;</wd>

<space/>

<wd l="4517" t="6600" r="5150" b="6792">Hirose,</wd>

<space/>

<wd l="5222" t="6600" r="5774" b="6802">2011).</wd>

<space/>

</ln>

<ln l="1416" t="6854" r="5784" b="7056" baseLine="7003" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="6854" r="2650" b="7013">Phonetisaurus</wd>

<space/>

<wd l="2746" t="6854" r="2885" b="7013">is</wd>

<space/>

<wd l="2981" t="6902" r="3187" b="7013">an</wd>

<space/>

<wd l="3278" t="6902" r="4349" b="7056">open-source</wd>

<space/>

<wd l="4430" t="6854" r="5366" b="7056">phonetizer</wd>

<space/>

<wd l="5453" t="6854" r="5784" b="7013">that</wd>

<space/>

</ln>

<ln l="1421" t="7109" r="5779" b="7310" baseLine="7258" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="7109" r="1560" b="7267">is</wd>

<space/>

<wd l="1637" t="7109" r="2419" b="7310">designed</wd>

<space/>

<wd l="2486" t="7109" r="2654" b="7262">in</wd>

<space/>

<wd l="2722" t="7109" r="2990" b="7267">the</wd>

<space/>

<wd l="3062" t="7109" r="3490" b="7267">form</wd>

<space/>

<wd l="3557" t="7109" r="3758" b="7267">of</wd>

<space/>

<wd l="3806" t="7157" r="3902" b="7267">a</wd>

<space/>

<wd l="3970" t="7109" r="4781" b="7310">weighted</wd>

<space/>

<wd l="4848" t="7109" r="5309" b="7267">finite</wd>

<space/>

<wd l="5386" t="7128" r="5779" b="7267">state</wd>

<space/>

</ln>

<ln l="1416" t="7358" r="5784" b="7560" baseLine="7507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="7358" r="2338" b="7517">transducer</wd>

<space/>

<wd l="2434" t="7358" r="3206" b="7560">(WFST).</wd>

<space/>

<wd l="3317" t="7358" r="3782" b="7517">After</wd>

<space/>

<wd l="3883" t="7358" r="4656" b="7560">selecting</wd>

<space/>

<wd l="4752" t="7358" r="5016" b="7517">the</wd>

<space/>

<wd l="5141" t="7358" r="5333" b="7517">10</wd>

<space/>

<wd l="5424" t="7358" r="5784" b="7517">best</wd>

<space/>

</ln>

<ln l="1411" t="7613" r="5784" b="7814" baseLine="7762" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="7613" r="2222" b="7814">phoneme</wd>

<space/>

<wd l="2299" t="7661" r="3230" b="7814">sequences,</wd>

<space/>

<wd l="3307" t="7613" r="3427" b="7771">it</wd>

<space/>

<wd l="3494" t="7613" r="3960" b="7771">looks</wd>

<space/>

<wd l="4032" t="7661" r="4248" b="7814">up</wd>

<space/>

<wd l="4320" t="7613" r="4584" b="7771">the</wd>

<space/>

<wd l="4646" t="7613" r="5539" b="7814">phonemes</wd>

<space/>

<wd l="5616" t="7613" r="5784" b="7766">in</wd>

<space/>

</ln>

<ln l="1421" t="7867" r="5784" b="8069" baseLine="8016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="7915" r="1517" b="8026">a</wd>

<space/>

<wd l="1656" t="7867" r="2774" b="8069">pronouncing</wd>

<space/>

<wd l="2923" t="7867" r="3811" b="8069">dictionary</wd>

<space/>

<wd l="3946" t="7963" r="4066" b="7973">–</wd>

<space/>

<wd l="4210" t="7867" r="5002" b="8069">Carnegie</wd>

<space/>

<wd l="5146" t="7867" r="5784" b="8026">Mellon</wd>

<space/>

</ln>

<ln l="1416" t="8122" r="5779" b="8323" baseLine="8270" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="8122" r="2347" b="8323">University</wd>

<space/>

<wd l="2410" t="8122" r="3048" b="8323">(CMU)</wd>

<space/>

<wd l="3120" t="8122" r="4046" b="8323">dictionary.</wd>

<space/>

<wd l="4123" t="8122" r="4459" b="8280">The</wd>

<space/>

<wd l="4526" t="8122" r="5021" b="8280">CMU</wd>

<space/>

<wd l="5083" t="8122" r="5222" b="8280">is</wd>

<space/>

<wd l="5290" t="8170" r="5386" b="8280">a</wd>

<space/>

<wd l="5443" t="8170" r="5779" b="8280">ma-</wd>

</ln>

<ln l="1421" t="8371" r="5779" b="8573" baseLine="8520" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8371" r="2712" b="8530">chine-readable</wd>

<space/>

<wd l="2770" t="8371" r="3994" b="8573">pronunciation</wd>

<space/>

<wd l="4061" t="8371" r="4949" b="8573">dictionary</wd>

<space/>

<wd l="5006" t="8371" r="5338" b="8530">that</wd>

<space/>

<wd l="5400" t="8419" r="5779" b="8530">con-</wd>

</ln>

<ln l="1416" t="8626" r="5779" b="8827" baseLine="8774" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="8626" r="1829" b="8784">tains</wd>

<space/>

<wd l="1915" t="8674" r="2304" b="8784">over</wd>

<space/>

<wd l="2405" t="8626" r="3091" b="8818">134,000</wd>

<space/>

<wd l="3173" t="8626" r="3706" b="8784">words</wd>

<space/>

<wd l="3787" t="8626" r="4618" b="8827">including</wd>

<space/>

<wd l="4694" t="8626" r="5165" b="8784">OOV</wd>

<space/>

<wd l="5246" t="8626" r="5779" b="8784">words</wd>

<space/>

</ln>

<ln l="1426" t="8880" r="5784" b="9082" baseLine="9029" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8880" r="1824" b="9038">such</wd>

<space/>

<wd l="1906" t="8928" r="2083" b="9038">as</wd>

<space/>

<wd l="2160" t="8928" r="2746" b="9082">proper</wd>

<space/>

<wd l="2822" t="8928" r="3341" b="9038">nouns</wd>

<space/>

<wd l="3432" t="8880" r="3749" b="9038">and</wd>

<space/>

<wd l="3830" t="8928" r="4723" b="9082">acronyms.</wd>

<space/>

<wd l="4814" t="8885" r="5179" b="9038">Due</wd>

<space/>

<wd l="5266" t="8899" r="5429" b="9038">to</wd>

<space/>

<wd l="5515" t="8880" r="5784" b="9038">the</wd>

<space/>

</ln>

<ln l="1421" t="9130" r="5784" b="9331" baseLine="9278" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="9130" r="2237" b="9288">existence</wd>

<space/>

<wd l="2299" t="9130" r="2501" b="9288">of</wd>

<space/>

<wd l="2539" t="9178" r="2635" b="9288">a</wd>

<space/>

<wd l="2693" t="9130" r="3130" b="9331">large</wd>

<space/>

<wd l="3187" t="9130" r="3864" b="9288">number</wd>

<space/>

<wd l="3922" t="9130" r="4123" b="9288">of</wd>

<space/>

<wd l="4162" t="9130" r="4632" b="9288">OOV</wd>

<space/>

<wd l="4694" t="9130" r="5227" b="9288">words</wd>

<space/>

<wd l="5290" t="9130" r="5458" b="9283">in</wd>

<space/>

<wd l="5515" t="9130" r="5784" b="9288">the</wd>

<space/>

</ln>

<ln l="1421" t="9384" r="5784" b="9586" baseLine="9533" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="9384" r="1920" b="9542">CMU</wd>

<space/>

<wd l="2016" t="9384" r="2952" b="9586">dictionary,</wd>

<space/>

<wd l="3048" t="9432" r="3302" b="9542">we</wd>

<space/>

<wd l="3403" t="9384" r="3830" b="9542">filter</wd>

<space/>

<wd l="3922" t="9403" r="4200" b="9542">out</wd>

<space/>

<wd l="4291" t="9384" r="4560" b="9542">the</wd>

<space/>

<wd l="4656" t="9384" r="5208" b="9542">OOVs</wd>

<space/>

<wd l="5304" t="9384" r="5784" b="9586">using</wd>

<space/>

</ln>

<ln l="1416" t="9634" r="3307" b="9835" baseLine="9787" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="9634" r="1685" b="9792">the</wd>

<space/>

<wd l="1742" t="9634" r="2314" b="9835">Aspell</wd>

<space/>

<wd l="2376" t="9634" r="3307" b="9835">dictionary.</wd>

</ln>

</para>

<para l="1411" t="9888" r="5789" b="13123" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="9888" r="5774" b="10090" baseLine="10037" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="9888" r="1987" b="10046">The</wd>

<space/>

<wd l="2126" t="9888" r="2549" b="10046">third</wd>

<space/>

<wd l="2683" t="9888" r="3394" b="10080">module,</wd>

<space/>

<wd l="3542" t="9936" r="3720" b="10046">as</wd>

<space/>

<wd l="3854" t="9888" r="4675" b="10090">proposed</wd>

<space/>

<wd l="4800" t="9888" r="5026" b="10090">by</wd>

<space/>

<wd l="5174" t="9888" r="5774" b="10080">Saloot,</wd>

<space/>

</ln>

<ln l="1421" t="10142" r="5779" b="10344" baseLine="10291" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="10142" r="1867" b="10334">Idris,</wd>

<space/>

<wd l="1934" t="10142" r="2256" b="10301">and</wd>

<space/>

<wd l="2304" t="10142" r="2621" b="10301">Aw</wd>

<space/>

<wd l="2683" t="10142" r="3317" b="10344">(2014),</wd>

<space/>

<wd l="3384" t="10142" r="3518" b="10301">is</wd>

<space/>

<wd l="3586" t="10190" r="3682" b="10301">a</wd>

<space/>

<wd l="3739" t="10142" r="4834" b="10301">combination</wd>

<space/>

<wd l="4896" t="10142" r="5093" b="10301">of</wd>

<space/>

<wd l="5126" t="10142" r="5395" b="10301">the</wd>

<space/>

<wd l="5453" t="10162" r="5779" b="10301">two</wd>

<space/>

</ln>

<ln l="1411" t="10392" r="5779" b="10594" baseLine="10546" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="10392" r="2174" b="10594">previous</wd>

<space/>

<wd l="2304" t="10392" r="3091" b="10550">modules.</wd>

<space/>

<wd l="3230" t="10392" r="3686" b="10584">First,</wd>

<space/>

<wd l="3821" t="10392" r="3941" b="10550">it</wd>

<space/>

<wd l="4075" t="10392" r="4829" b="10594">lexically</wd>

<space/>

<wd l="4958" t="10411" r="5779" b="10594">generates</wd>

<space/>

</ln>

<ln l="1421" t="10646" r="5784" b="10848" baseLine="10800" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="10646" r="2342" b="10805">candidates</wd>

<space/>

<wd l="2429" t="10646" r="2990" b="10805">within</wd>

<space/>

<wd l="3077" t="10694" r="3389" b="10805">one</wd>

<space/>

<wd l="3475" t="10646" r="3802" b="10805">edit</wd>

<space/>

<wd l="3883" t="10646" r="4598" b="10805">distance</wd>

<space/>

<wd l="4685" t="10646" r="4886" b="10805">of</wd>

<space/>

<wd l="4944" t="10646" r="5213" b="10805">the</wd>

<space/>

<wd l="5299" t="10646" r="5784" b="10848">given</wd>

<space/>

</ln>

<ln l="1421" t="10901" r="5779" b="11093" baseLine="11050" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="10901" r="1891" b="11059">OOV</wd>

<space/>

<wd l="1963" t="10901" r="2467" b="11093">word,</wd>

<space/>

<wd l="2549" t="10901" r="2866" b="11059">and</wd>

<space/>

<wd l="2928" t="10901" r="3307" b="11059">then</wd>

<space/>

<wd l="3389" t="10901" r="3864" b="11059">sends</wd>

<space/>

<wd l="3931" t="10901" r="4200" b="11059">the</wd>

<space/>

<wd l="4277" t="10901" r="5198" b="11059">candidates</wd>

<space/>

<wd l="5270" t="10920" r="5438" b="11059">to</wd>

<space/>

<wd l="5510" t="10901" r="5779" b="11059">the</wd>

<space/>

</ln>

<ln l="1411" t="11150" r="5779" b="11352" baseLine="11304" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="11150" r="2222" b="11352">phoneme</wd>

<space/>

<wd l="2304" t="11150" r="3005" b="11309">module.</wd>

<space/>

<wd l="3106" t="11150" r="3586" b="11309">Since</wd>

<space/>

<wd l="3672" t="11198" r="3960" b="11309">our</wd>

<space/>

<wd l="4037" t="11150" r="4627" b="11352">testing</wd>

<space/>

<wd l="4709" t="11150" r="5314" b="11309">dataset</wd>

<space/>

<wd l="5400" t="11198" r="5779" b="11309">con-</wd>

</ln>

<ln l="1426" t="11405" r="5779" b="11606" baseLine="11558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="11405" r="1795" b="11563">sists</wd>

<space/>

<wd l="1882" t="11405" r="2078" b="11563">of</wd>

<space/>

<wd l="2136" t="11405" r="2808" b="11606">English</wd>

<space/>

<wd l="2885" t="11410" r="3514" b="11563">Tweets</wd>

<space/>

<wd l="3590" t="11405" r="4176" b="11606">posted</wd>

<space/>

<wd l="4243" t="11405" r="4469" b="11606">by</wd>

<space/>

<wd l="4550" t="11405" r="5779" b="11606">Singaporeans,</wd>

<space/>

</ln>

<ln l="1421" t="11659" r="5779" b="11861" baseLine="11808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="11659" r="2765" b="11861">code-switching</wd>

<space/>

<wd l="2875" t="11659" r="3614" b="11818">between</wd>

<space/>

<wd l="3730" t="11659" r="4296" b="11861">Malay</wd>

<space/>

<wd l="4416" t="11659" r="4733" b="11818">and</wd>

<space/>

<wd l="4848" t="11659" r="5520" b="11861">English</wd>

<space/>

<wd l="5640" t="11659" r="5779" b="11818">is</wd>

<space/>

</ln>

<ln l="1421" t="11914" r="5779" b="12115" baseLine="12062" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="11914" r="2150" b="12115">frequent</wd>

<space/>

<wd l="2242" t="11914" r="2410" b="12067">in</wd>

<space/>

<wd l="2501" t="11914" r="2765" b="12072">the</wd>

<space/>

<wd l="2856" t="11933" r="3230" b="12072">text.</wd>

<space/>

<wd l="3336" t="11914" r="4248" b="12106">Therefore,</wd>

<space/>

<wd l="4349" t="11962" r="4642" b="12072">our</wd>

<space/>

<wd l="4733" t="11914" r="5035" b="12072">last</wd>

<space/>

<wd l="5122" t="11914" r="5779" b="12072">module</wd>

<space/>

</ln>

<ln l="1416" t="12163" r="5779" b="12365" baseLine="12317" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="12163" r="2246" b="12322">translates</wd>

<space/>

<wd l="2357" t="12163" r="2827" b="12322">OOV</wd>

<space/>

<wd l="2928" t="12163" r="3461" b="12322">words</wd>

<space/>

<wd l="3571" t="12182" r="3734" b="12322">to</wd>

<space/>

<wd l="3840" t="12163" r="4517" b="12365">English</wd>

<space/>

<wd l="4622" t="12163" r="4848" b="12365">(if</wd>

<space/>

<wd l="4934" t="12163" r="5362" b="12365">any).</wd>

<space/>

<wd l="5477" t="12168" r="5779" b="12322">We</wd>

<space/>

</ln>

<ln l="1426" t="12418" r="5774" b="12619" baseLine="12566" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="12418" r="2194" b="12576">searched</wd>

<space/>

<wd l="2318" t="12418" r="2578" b="12576">for</wd>

<space/>

<wd l="2702" t="12418" r="2966" b="12576">the</wd>

<space/>

<wd l="3096" t="12418" r="3667" b="12576">tokens</wd>

<space/>

<wd l="3802" t="12418" r="3970" b="12571">in</wd>

<space/>

<wd l="4094" t="12418" r="4363" b="12576">the</wd>

<space/>

<wd l="4502" t="12418" r="5021" b="12576">Smith</wd>

<space/>

<wd l="5146" t="12418" r="5774" b="12619">Malay-</wd>

</ln>

<ln l="1416" t="12672" r="5789" b="12874" baseLine="12821" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="12672" r="2088" b="12874">English</wd>

<space/>

<wd l="2198" t="12672" r="3139" b="12874">Dictionary</wd>

<space/>

<wd l="3250" t="12672" r="3845" b="12874">(Smith</wd>

<space/>

<wd l="3955" t="12672" r="4118" b="12830">&amp;</wd>

<space/>

<wd l="4229" t="12672" r="4670" b="12864">Padi,</wd>

<space/>

<wd l="4790" t="12672" r="5352" b="12874">2006),</wd>

<space/>

<wd l="5472" t="12672" r="5789" b="12830">and</wd>

<space/>

</ln>

<ln l="1421" t="12922" r="5126" b="13123" baseLine="13075" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="12922" r="2122" b="13080">inserted</wd>

<space/>

<wd l="2170" t="12922" r="2438" b="13080">the</wd>

<space/>

<wd l="2496" t="12922" r="3331" b="13123">meanings</wd>

<space/>

<wd l="3398" t="12922" r="3566" b="13075">in</wd>

<space/>

<wd l="3619" t="12922" r="3883" b="13080">the</wd>

<space/>

<wd l="3946" t="12922" r="4786" b="13080">candidate</wd>

<space/>

<wd l="4848" t="12941" r="5126" b="13080">set.</wd>

</ln>

</para>

<para l="1411" t="13176" r="5789" b="15149" alignment="justified" spaceBefore="2" spaceAfter="134" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="13176" r="5779" b="13378" baseLine="13330" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="13176" r="2146" b="13334">Table</wd>

<space/>

<wd l="2232" t="13176" r="2294" b="13330">1</wd>

<space/>

<wd l="2386" t="13176" r="3096" b="13378">displays</wd>

<space/>

<wd l="3163" t="13176" r="3432" b="13334">the</wd>

<space/>

<wd l="3504" t="13224" r="4176" b="13378">average</wd>

<space/>

<wd l="4243" t="13176" r="4915" b="13334">number</wd>

<space/>

<wd l="4982" t="13176" r="5184" b="13334">of</wd>

<space/>

<wd l="5227" t="13224" r="5779" b="13378">gener-</wd>

</ln>

<ln l="1421" t="13430" r="5784" b="13589" baseLine="13579" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13430" r="1790" b="13589">ated</wd>

<space/>

<wd l="1853" t="13430" r="2774" b="13589">candidates</wd>

<space/>

<wd l="2846" t="13430" r="3101" b="13589">for</wd>

<space/>

<wd l="3168" t="13430" r="3566" b="13589">each</wd>

<space/>

<wd l="3629" t="13430" r="4334" b="13589">module.</wd>

<space/>

<wd l="4411" t="13430" r="4752" b="13589">The</wd>

<space/>

<wd l="4819" t="13430" r="5390" b="13589">lowest</wd>

<space/>

<wd l="5453" t="13450" r="5784" b="13589">rate</wd>

<space/>

</ln>

<ln l="1421" t="13685" r="5770" b="13886" baseLine="13834" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13685" r="1560" b="13843">is</wd>

<space/>

<wd l="1651" t="13685" r="2558" b="13843">associated</wd>

<space/>

<wd l="2630" t="13685" r="3024" b="13843">with</wd>

<space/>

<wd l="3101" t="13685" r="3370" b="13843">the</wd>

<space/>

<wd l="3451" t="13685" r="4013" b="13886">Malay</wd>

<space/>

<wd l="4099" t="13685" r="4987" b="13886">dictionary</wd>

<space/>

<wd l="5069" t="13685" r="5770" b="13843">module.</wd>

<space/>

</ln>

<ln l="1421" t="13934" r="5784" b="14136" baseLine="14088" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13939" r="1819" b="14093">Two</wd>

<space/>

<wd l="1906" t="13934" r="2486" b="14093">lexical</wd>

<space/>

<wd l="2573" t="13934" r="2899" b="14093">edit</wd>

<space/>

<wd l="2981" t="13934" r="3888" b="14136">operations</wd>

<space/>

<wd l="3979" t="13954" r="4718" b="14136">generate</wd>

<space/>

<wd l="4800" t="13934" r="5069" b="14093">the</wd>

<space/>

<wd l="5150" t="13934" r="5784" b="14136">highest</wd>

<space/>

</ln>

<ln l="1416" t="14189" r="5779" b="14390" baseLine="14338" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="14189" r="2088" b="14347">number</wd>

<space/>

<wd l="2174" t="14189" r="2376" b="14347">of</wd>

<space/>

<wd l="2438" t="14189" r="3408" b="14381">candidates,</wd>

<space/>

<wd l="3494" t="14189" r="4032" b="14347">which</wd>

<space/>

<wd l="4114" t="14189" r="4886" b="14347">indicates</wd>

<space/>

<wd l="4973" t="14189" r="5237" b="14347">the</wd>

<space/>

<wd l="5318" t="14189" r="5779" b="14390">high-</wd>

</ln>

<ln l="1421" t="14443" r="5779" b="14645" baseLine="14592" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14462" r="1666" b="14602">est</wd>

<space/>

<wd l="1723" t="14443" r="2213" b="14602">recall</wd>

<space/>

<wd l="2285" t="14443" r="2602" b="14602">and</wd>

<space/>

<wd l="2664" t="14443" r="3235" b="14602">lowest</wd>

<space/>

<wd l="3293" t="14443" r="4147" b="14645">precision.</wd>

<space/>

<wd l="4229" t="14443" r="4565" b="14602">The</wd>

<space/>

<wd l="4627" t="14443" r="5026" b="14602">rank</wd>

<space/>

<wd l="5088" t="14443" r="5290" b="14602">of</wd>

<space/>

<wd l="5338" t="14491" r="5779" b="14602">com-</wd>

</ln>

<ln l="1411" t="14693" r="5789" b="14894" baseLine="14846" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="14693" r="2136" b="14851">bination</wd>

<space/>

<wd l="2242" t="14693" r="2558" b="14851">and</wd>

<space/>

<wd l="2645" t="14693" r="3451" b="14894">phoneme</wd>

<space/>

<wd l="3552" t="14693" r="4296" b="14851">modules</wd>

<space/>

<wd l="4397" t="14741" r="4661" b="14851">are</wd>

<space/>

<wd l="4766" t="14693" r="5376" b="14851">second</wd>

<space/>

<wd l="5472" t="14693" r="5789" b="14851">and</wd>

<space/>

</ln>

<ln l="1416" t="14947" r="3048" b="15149" baseLine="15101" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="14947" r="1882" b="15139">third,</wd>

<space/>

<wd l="1944" t="14947" r="3048" b="15149">respectively.</wd>

</ln>

</para>

</column>

<column l="6010" t="462" r="10598" b="15307">

<rulerline l="6010" t="1430" r="10598" b="1430" type="single" width="34" color="000000"/>

<table l="6010" t="1434" r="10598" b="3245" alignment="left" spaceBefore="972" spaceAfter="34">

<bottomBorder type="single" width="34"/>

<gridTable>

<gridCol>518</gridCol>

<gridCol>1944</gridCol>

<gridCol>2126</gridCol>

<gridRow>524</gridRow>

<gridRow>509</gridRow>

<gridRow>259</gridRow>

<gridRow>250</gridRow>

<gridRow>269</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<bottomBorder type="single" width="19"/>

<para l="6110" t="1498" r="6413" b="1685" alignment="left" li="100" spaceAfter="244" lsp="exactly" lspExact="250" language="en">

<ln l="6110" t="1498" r="6413" b="1685" baseLine="1642" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="15">

<wd l="6110" t="1498" r="6413" b="1685">Io.</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<bottomBorder type="single" width="19"/>

<para l="6648" t="1493" r="7296" b="1651" alignment="left" li="120" spaceAfter="244" lsp="exactly" lspExact="250" language="en">

<ln l="6648" t="1493" r="7296" b="1651" baseLine="1642" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6648" t="1493" r="7296" b="1651">module</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<bottomBorder type="single" width="19"/>

<para l="8573" t="1493" r="10531" b="1906" alignment="left" li="108" lsp="exactly" lspExact="249" language="en">

<ln l="8573" t="1493" r="10531" b="1694" baseLine="1642" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8573" t="1493" r="9322" b="1690">Average</wd>

<space/>

<wd l="9485" t="1493" r="10162" b="1651">number</wd>

<space/>

<wd l="10325" t="1493" r="10531" b="1694">of</wd>

<space/>

</ln>

<ln l="8592" t="1747" r="9538" b="1906" baseLine="1896" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8592" t="1747" r="9538" b="1906">candidates</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<topBorder type="single" width="19"/>

<para l="6144" t="2011" r="6274" b="2170" alignment="left" li="72" spaceAfter="240" lsp="exactly" lspExact="253" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6144" t="2011" r="6274" b="2165" baseLine="2165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="48">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<topBorder type="single" width="19"/>

<para l="6653" t="2011" r="8376" b="2170" alignment="left" li="72" lsp="exactly" lspExact="249" language="en">

<tabs position="6653"/>

<tabs alignment="right" position="2170" leaderChar=" "/>

<ln l="6653" t="2011" r="8376" b="2170" baseLine="2165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6653" t="2016" r="7051" b="2170">Two</wd>

<tab position="7051"/>

<wd l="7258" t="2011" r="7838" b="2170">lexical</wd>

<tab position="7838"/>

<wd l="8050" t="2011" r="8376" b="2170">edit</wd>

</ln>

</para>

<para l="6653" t="2266" r="7368" b="2424" alignment="left" li="72" lsp="exactly" lspExact="244" language="en">

<ln l="6653" t="2266" r="7368" b="2424" baseLine="2419" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6653" t="2266" r="7368" b="2424">distance</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<topBorder type="single" width="19"/>

<para l="9432" t="2011" r="9643" b="2170" alignment="right" ri="967" spaceAfter="240" lsp="exactly" lspExact="253" language="en">

<ln l="9432" t="2011" r="9643" b="2170" baseLine="2165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7">

<wd l="9432" t="2011" r="9643" b="2170">70</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="6125" t="2520" r="6274" b="2678" alignment="left" li="72" lsp="exactly" lspExact="248" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6125" t="2520" r="6274" b="2674" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="48">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="6653" t="2520" r="7800" b="2678" alignment="left" li="120" lsp="exactly" lspExact="248" language="en">

<ln l="6653" t="2520" r="7800" b="2678" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6653" t="2520" r="7800" b="2678">Combination</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="9437" t="2520" r="9643" b="2678" alignment="right" ri="967" lsp="exactly" lspExact="248" language="en">

<ln l="9437" t="2520" r="9643" b="2678" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-9">

<wd l="9437" t="2520" r="9643" b="2678">50</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="6125" t="2774" r="6274" b="2933" alignment="left" li="72" lsp="exactly" lspExact="240" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6125" t="2774" r="6274" b="2933" baseLine="2923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="48">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="6648" t="2774" r="7464" b="2933" alignment="left" li="120" lsp="exactly" lspExact="240" language="en">

<ln l="6648" t="2774" r="7464" b="2933" baseLine="2923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6648" t="2774" r="7464" b="2933">Phoneme</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="9432" t="2774" r="9643" b="2933" alignment="right" ri="967" lsp="exactly" lspExact="240" language="en">

<ln l="9432" t="2774" r="9643" b="2933" baseLine="2923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7">

<wd l="9432" t="2774" r="9643" b="2933">20</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="6120" t="3024" r="6274" b="3182" alignment="left" li="72" spaceAfter="5" lsp="exactly" lspExact="253" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6120" t="3024" r="6274" b="3178" baseLine="3178" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="48">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="6648" t="3024" r="8160" b="3226" alignment="left" li="120" spaceAfter="5" lsp="exactly" lspExact="253" language="en">

<ln l="6648" t="3024" r="8160" b="3226" baseLine="3178" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6648" t="3024" r="7214" b="3226">Malay</wd>

<space/>

<wd l="7272" t="3024" r="8160" b="3226">dictionary</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="9485" t="3024" r="9576" b="3182" alignment="right" ri="967" spaceAfter="5" lsp="exactly" lspExact="253" language="en">

<ln l="9485" t="3024" r="9576" b="3182" baseLine="3178" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9485" t="3024" r="9576" b="3182">3</wd>

</ln>

</para>

</cell>

</table>

<para l="6125" t="3307" r="10483" b="3720" alignment="justified" li="72" ri="72" lsp="exactly" lspExact="244" language="en">

<ln l="6125" t="3307" r="10483" b="3509" baseLine="3461" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3307" r="6624" b="3466">Table</wd>

<space/>

<wd l="6730" t="3307" r="6859" b="3466">1:</wd>

<space/>

<wd l="6965" t="3307" r="7301" b="3466">The</wd>

<space/>

<wd l="7387" t="3355" r="8064" b="3509">average</wd>

<space/>

<wd l="8150" t="3307" r="8827" b="3466">number</wd>

<space/>

<wd l="8909" t="3307" r="9110" b="3466">of</wd>

<space/>

<wd l="9178" t="3307" r="10032" b="3509">generated</wd>

<space/>

<wd l="10114" t="3355" r="10483" b="3466">can-</wd>

</ln>

<ln l="6125" t="3562" r="8597" b="3720" baseLine="3710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3562" r="6744" b="3720">didates</wd>

<space/>

<wd l="6806" t="3562" r="7061" b="3720">for</wd>

<space/>

<wd l="7118" t="3562" r="7450" b="3720">five</wd>

<space/>

<wd l="7512" t="3562" r="7968" b="3720">letter</wd>

<space/>

<wd l="8021" t="3562" r="8597" b="3720">words.</wd>

</ln>

</para>

<para l="6125" t="4022" r="8554" b="4195" alignment="left" li="72" ri="72" spaceBefore="208" lsp="exactly" lspExact="273" language="en">

<ln l="6125" t="4022" r="8554" b="4195" baseLine="4186" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">

<wd l="6125" t="4027" r="6235" b="4195">5</wd>

<space/>

<wd l="6562" t="4027" r="7618" b="4195">Candidate</wd>

<space/>

<wd l="7690" t="4027" r="8554" b="4195">selection</wd>

</ln>

</para>

<para l="6120" t="4450" r="10493" b="9206" alignment="justified" li="72" ri="72" spaceBefore="156" lsp="exactly" lspExact="253" language="en">

<ln l="6125" t="4450" r="10493" b="4651" baseLine="4603" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="4450" r="6466" b="4608">The</wd>

<space/>

<wd l="6523" t="4450" r="6960" b="4608">main</wd>

<space/>

<wd l="7022" t="4450" r="8098" b="4608">contribution</wd>

<space/>

<wd l="8155" t="4450" r="8352" b="4608">of</wd>

<space/>

<wd l="8390" t="4450" r="8702" b="4608">this</wd>

<space/>

<wd l="8765" t="4450" r="9221" b="4608">work</wd>

<space/>

<wd l="9274" t="4450" r="9413" b="4608">is</wd>

<space/>

<wd l="9475" t="4469" r="9643" b="4608">to</wd>

<space/>

<wd l="9696" t="4469" r="10339" b="4651">present</wd>

<space/>

<wd l="10397" t="4498" r="10493" b="4608">a</wd>

<space/>

</ln>

<ln l="6120" t="4704" r="10488" b="4862" baseLine="4853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6120" t="4704" r="6605" b="4862">novel</wd>

<space/>

<wd l="6686" t="4704" r="7522" b="4862">candidate</wd>

<space/>

<wd l="7603" t="4704" r="8376" b="4862">selection</wd>

<space/>

<wd l="8448" t="4704" r="9149" b="4862">method.</wd>

<space/>

<wd l="9235" t="4704" r="9576" b="4862">The</wd>

<space/>

<wd l="9653" t="4704" r="10488" b="4862">candidate</wd>

<space/>

</ln>

<ln l="6130" t="4958" r="10493" b="5160" baseLine="5107" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="4958" r="6907" b="5117">selection</wd>

<space/>

<wd l="6974" t="4978" r="7411" b="5160">stage</wd>

<space/>

<wd l="7478" t="4958" r="8165" b="5117">consists</wd>

<space/>

<wd l="8232" t="4958" r="8434" b="5117">of</wd>

<space/>

<wd l="8467" t="4978" r="8794" b="5117">two</wd>

<space/>

<wd l="8866" t="4978" r="9341" b="5160">steps:</wd>

<space/>

<wd l="9442" t="4958" r="9595" b="5160">1)</wd>

<space/>

<wd l="9662" t="4958" r="10493" b="5160">assigning</wd>

<space/>

</ln>

<ln l="6125" t="5208" r="10493" b="5410" baseLine="5362" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="5256" r="6221" b="5366">a</wd>

<space/>

<wd l="6288" t="5208" r="6902" b="5410">variety</wd>

<space/>

<wd l="6974" t="5208" r="7176" b="5366">of</wd>

<space/>

<wd l="7219" t="5208" r="8189" b="5410">probability</wd>

<space/>

<wd l="8266" t="5256" r="8803" b="5366">scores</wd>

<space/>

<wd l="8875" t="5227" r="9043" b="5366">to</wd>

<space/>

<wd l="9125" t="5208" r="10094" b="5400">candidates,</wd>

<space/>

<wd l="10176" t="5208" r="10493" b="5366">and</wd>

<space/>

</ln>

<ln l="6125" t="5462" r="10488" b="5664" baseLine="5611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="5462" r="6298" b="5664">2)</wd>

<space/>

<wd l="6370" t="5462" r="7320" b="5664">integrating</wd>

<space/>

<wd l="7378" t="5462" r="8347" b="5664">probability</wd>

<space/>

<wd l="8414" t="5510" r="8952" b="5621">scores</wd>

<space/>

<wd l="9014" t="5482" r="9182" b="5621">to</wd>

<space/>

<wd l="9254" t="5462" r="9744" b="5621">select</wd>

<space/>

<wd l="9806" t="5462" r="10070" b="5621">the</wd>

<space/>

<wd l="10128" t="5462" r="10488" b="5621">best</wd>

<space/>

</ln>

<ln l="6125" t="5717" r="10483" b="5875" baseLine="5866" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="5717" r="7008" b="5875">candidate.</wd>

<space/>

<wd l="7147" t="5717" r="7483" b="5875">Our</wd>

<space/>

<wd l="7608" t="5717" r="8443" b="5875">candidate</wd>

<space/>

<wd l="8573" t="5717" r="9346" b="5875">selection</wd>

<space/>

<wd l="9466" t="5717" r="10128" b="5875">method</wd>

<space/>

<wd l="10243" t="5765" r="10483" b="5875">re-</wd>

</ln>

<ln l="6125" t="5966" r="10488" b="6168" baseLine="6120" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="5966" r="6658" b="6168">quires</wd>

<space/>

<wd l="6730" t="6014" r="6826" b="6125">a</wd>

<space/>

<wd l="6888" t="5966" r="7570" b="6168">training</wd>

<space/>

<wd l="7637" t="5966" r="8290" b="6125">dataset.</wd>

<space/>

<wd l="8366" t="5966" r="8702" b="6125">The</wd>

<space/>

<wd l="8770" t="5966" r="9456" b="6168">training</wd>

<space/>

<wd l="9523" t="5966" r="9840" b="6125">and</wd>

<space/>

<wd l="9902" t="5966" r="10488" b="6168">testing</wd>

<space/>

</ln>

<ln l="6125" t="6221" r="10493" b="6422" baseLine="6374" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="6221" r="6816" b="6379">datasets</wd>

<space/>

<wd l="6902" t="6269" r="7166" b="6379">are</wd>

<space/>

<wd l="7248" t="6221" r="8045" b="6379">collected</wd>

<space/>

<wd l="8122" t="6221" r="8544" b="6379">from</wd>

<space/>

<wd l="8630" t="6269" r="8827" b="6379">an</wd>

<space/>

<wd l="8914" t="6221" r="9739" b="6379">extensive</wd>

<space/>

<wd l="9821" t="6221" r="10493" b="6422">English</wd>

<space/>

</ln>

<ln l="6125" t="6475" r="10483" b="6677" baseLine="6624" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="6475" r="6773" b="6634">Twitter</wd>

<space/>

<wd l="6869" t="6523" r="7445" b="6677">corpus</wd>

<space/>

<wd l="7536" t="6475" r="8122" b="6677">posted</wd>

<space/>

<wd l="8203" t="6475" r="8429" b="6677">by</wd>

<space/>

<wd l="8530" t="6475" r="9701" b="6677">Singaporeans</wd>

<space/>

<wd l="9802" t="6475" r="10483" b="6677">(Saloot,</wd>

<space/>

</ln>

<ln l="6125" t="6725" r="10483" b="6926" baseLine="6878" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="6725" r="6571" b="6917">Idris,</wd>

<space/>

<wd l="6648" t="6725" r="7013" b="6917">Aw,</wd>

<space/>

<wd l="7094" t="6725" r="7258" b="6883">&amp;</wd>

<space/>

<wd l="7334" t="6725" r="8520" b="6917">Thorleuchter,</wd>

<space/>

<wd l="8597" t="6725" r="9154" b="6926">2014).</wd>

<space/>

<wd l="9235" t="6725" r="9744" b="6883">Three</wd>

<space/>

<wd l="9821" t="6725" r="10483" b="6926">linguis-</wd>

</ln>

<ln l="6120" t="6979" r="10483" b="7181" baseLine="7133" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6120" t="6979" r="6336" b="7138">tic</wd>

<space/>

<wd l="6451" t="6998" r="7075" b="7181">experts</wd>

<space/>

<wd l="7186" t="6979" r="8006" b="7181">manually</wd>

<space/>

<wd l="8107" t="6979" r="9101" b="7138">normalized</wd>

<space/>

<wd l="9206" t="6979" r="9691" b="7171">7,000</wd>

<space/>

<wd l="9802" t="6984" r="10483" b="7171">Tweets,</wd>

<space/>

</ln>

<ln l="6125" t="7234" r="10493" b="7435" baseLine="7382" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="7234" r="6614" b="7392">while</wd>

<space/>

<wd l="6691" t="7234" r="7171" b="7435">using</wd>

<space/>

<wd l="7253" t="7234" r="8947" b="7392">inter-normalization</wd>

<space/>

<wd l="9029" t="7253" r="9941" b="7435">agreement</wd>

<space/>

<wd l="10022" t="7282" r="10200" b="7392">as</wd>

<space/>

<wd l="10286" t="7282" r="10493" b="7392">an</wd>

<space/>

</ln>

<ln l="6130" t="7488" r="10488" b="7690" baseLine="7637" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="7488" r="6950" b="7646">indicator.</wd>

<space/>

<wd l="7027" t="7488" r="7363" b="7646">The</wd>

<space/>

<wd l="7435" t="7507" r="8059" b="7690">experts</wd>

<space/>

<wd l="8126" t="7536" r="8554" b="7646">were</wd>

<space/>

<wd l="8621" t="7488" r="9490" b="7646">instructed</wd>

<space/>

<wd l="9547" t="7507" r="9715" b="7646">to</wd>

<space/>

<wd l="9778" t="7488" r="10488" b="7690">produce</wd>

<space/>

</ln>

<ln l="6130" t="7738" r="10483" b="7939" baseLine="7891" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="7786" r="6226" b="7896">a</wd>

<space/>

<wd l="6288" t="7757" r="6619" b="7896">text</wd>

<space/>

<wd l="6686" t="7738" r="7018" b="7896">that</wd>

<space/>

<wd l="7090" t="7738" r="7229" b="7896">is</wd>

<space/>

<wd l="7301" t="7786" r="7478" b="7896">as</wd>

<space/>

<wd l="7550" t="7738" r="7997" b="7896">close</wd>

<space/>

<wd l="8069" t="7757" r="8232" b="7896">to</wd>

<space/>

<wd l="8314" t="7738" r="9058" b="7896">standard</wd>

<space/>

<wd l="9120" t="7738" r="9792" b="7939">English</wd>

<space/>

<wd l="9864" t="7786" r="10037" b="7896">as</wd>

<space/>

<wd l="10104" t="7786" r="10483" b="7939">pos-</wd>

</ln>

<ln l="6134" t="7992" r="10478" b="8194" baseLine="8141" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="7992" r="6590" b="8184">sible,</wd>

<space/>

<wd l="6648" t="7992" r="6936" b="8150">but</wd>

<space/>

<wd l="6998" t="7992" r="7541" b="8150">leaves</wd>

<space/>

<wd l="7603" t="7992" r="7872" b="8150">the</wd>

<space/>

<wd l="7934" t="7992" r="8582" b="8150">Twitter</wd>

<space/>

<wd l="8645" t="7992" r="9245" b="8194">special</wd>

<space/>

<wd l="9317" t="7992" r="10032" b="8194">symbols</wd>

<space/>

<wd l="10104" t="7992" r="10478" b="8194">(e.g.</wd>

<space/>

</ln>

<ln l="6125" t="8246" r="10488" b="8448" baseLine="8395" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="8246" r="6667" b="8448">#topic</wd>

<space/>

<wd l="6778" t="8246" r="7099" b="8405">and</wd>

<space/>

<wd l="7205" t="8246" r="8304" b="8448">@username)</wd>

<space/>

<wd l="8414" t="8294" r="8587" b="8405">as</wd>

<space/>

<wd l="8698" t="8246" r="8885" b="8405">is.</wd>

<space/>

<wd l="9000" t="8246" r="9336" b="8405">The</wd>

<space/>

<wd l="9442" t="8246" r="10051" b="8405">dataset</wd>

<space/>

<wd l="10147" t="8294" r="10488" b="8405">was</wd>

<space/>

</ln>

<ln l="6134" t="8496" r="10488" b="8698" baseLine="8645" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="8496" r="6504" b="8698">split</wd>

<space/>

<wd l="6571" t="8496" r="6902" b="8654">into</wd>

<space/>

<wd l="6970" t="8515" r="7296" b="8654">two</wd>

<space/>

<wd l="7358" t="8515" r="7834" b="8698">parts:</wd>

<space/>

<wd l="7925" t="8496" r="8405" b="8688">5,000</wd>

<space/>

<wd l="8472" t="8544" r="9298" b="8698">messages</wd>

<space/>

<wd l="9370" t="8496" r="9624" b="8654">for</wd>

<space/>

<wd l="9682" t="8496" r="9950" b="8654">the</wd>

<space/>

<wd l="10013" t="8496" r="10488" b="8654">train-</wd>

</ln>

<ln l="6130" t="8750" r="10493" b="8952" baseLine="8899" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="8750" r="6408" b="8952">ing</wd>

<space/>

<wd l="6509" t="8750" r="7066" b="8952">phase,</wd>

<space/>

<wd l="7186" t="8750" r="7507" b="8909">and</wd>

<space/>

<wd l="7618" t="8750" r="8102" b="8942">2,000</wd>

<space/>

<wd l="8218" t="8798" r="9043" b="8952">messages</wd>

<space/>

<wd l="9163" t="8750" r="9418" b="8909">for</wd>

<space/>

<wd l="9523" t="8750" r="9792" b="8909">the</wd>

<space/>

<wd l="9902" t="8750" r="10493" b="8952">testing</wd>

<space/>

</ln>

<ln l="6120" t="9005" r="6672" b="9206" baseLine="9154" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6120" t="9005" r="6672" b="9206">phase.</wd>

</ln>

</para>

<para l="6130" t="9422" r="9758" b="9624" alignment="left" li="72" ri="72" spaceBefore="168" lsp="exactly" lspExact="254" language="en">

<ln l="6130" t="9422" r="9758" b="9624" baseLine="9571" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="6">

<wd l="6130" t="9422" r="6379" b="9581">5.1</wd>

<space/>

<wd l="6706" t="9422" r="7786" b="9581">Calculation</wd>

<space/>

<wd l="7848" t="9422" r="8045" b="9581">of</wd>

<space/>

<wd l="8083" t="9422" r="9134" b="9624">probability</wd>

<space/>

<wd l="9192" t="9470" r="9758" b="9581">scores</wd>

</ln>

</para>

<para l="6125" t="9792" r="10493" b="10752" alignment="justified" li="72" ri="72" spaceBefore="111" lsp="exactly" lspExact="252" language="en">

<ln l="6130" t="9792" r="10483" b="9984" baseLine="9941" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9797" r="6302" b="9946">In</wd>

<space/>

<wd l="6408" t="9792" r="6874" b="9950">order</wd>

<space/>

<wd l="6965" t="9811" r="7133" b="9950">to</wd>

<space/>

<wd l="7243" t="9792" r="7738" b="9950">select</wd>

<space/>

<wd l="7834" t="9792" r="8102" b="9950">the</wd>

<space/>

<wd l="8203" t="9811" r="8626" b="9950">most</wd>

<space/>

<wd l="8736" t="9792" r="9408" b="9950">suitable</wd>

<space/>

<wd l="9509" t="9792" r="10483" b="9984">candidates,</wd>

<space/>

</ln>

<ln l="6125" t="10042" r="10488" b="10243" baseLine="10190" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10090" r="6379" b="10200">we</wd>

<space/>

<wd l="6470" t="10042" r="7243" b="10200">calculate</wd>

<space/>

<wd l="7330" t="10042" r="7738" b="10200">their</wd>

<space/>

<wd l="7824" t="10042" r="8803" b="10200">conditional</wd>

<space/>

<wd l="8890" t="10042" r="9859" b="10243">probability</wd>

<space/>

<wd l="9950" t="10090" r="10488" b="10200">scores</wd>

<space/>

</ln>

<ln l="6125" t="10296" r="10493" b="10498" baseLine="10445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10296" r="6648" b="10498">using,</wd>

<space/>

<wd l="6758" t="10296" r="7627" b="10498">positional</wd>

<space/>

<wd l="7738" t="10296" r="8549" b="10498">indexing,</wd>

<space/>

<wd l="8669" t="10344" r="8765" b="10454">a</wd>

<space/>

<wd l="8870" t="10296" r="10493" b="10498">dependency-based</wd>

<space/>

</ln>

<ln l="6130" t="10550" r="10282" b="10752" baseLine="10699" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10550" r="7008" b="10752">frequency</wd>

<space/>

<wd l="7066" t="10550" r="7718" b="10742">feature,</wd>

<space/>

<wd l="7786" t="10550" r="8102" b="10709">and</wd>

<space/>

<wd l="8160" t="10598" r="8256" b="10709">a</wd>

<space/>

<wd l="8309" t="10550" r="9096" b="10752">language</wd>

<space/>

<wd l="9154" t="10550" r="9701" b="10709">model</wd>

<space/>

<wd l="9763" t="10550" r="10282" b="10752">(LM).</wd>

</ln>

</para>

<para l="6120" t="10800" r="10512" b="15259" alignment="justified" li="72" ri="72" fli="288" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="10800" r="10483" b="11002" baseLine="10949" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6355" t="10800" r="7080" b="11002">Inspired</wd>

<space/>

<wd l="7157" t="10800" r="7382" b="11002">by</wd>

<space/>

<wd l="7464" t="10800" r="7925" b="10958">work</wd>

<space/>

<wd l="8006" t="10848" r="8222" b="10958">on</wd>

<space/>

<wd l="8314" t="10848" r="8410" b="10958">a</wd>

<space/>

<wd l="8496" t="10800" r="9720" b="10958">normalization</wd>

<space/>

<wd l="9811" t="10800" r="10483" b="10958">diction-</wd>

</ln>

<ln l="6130" t="11054" r="10488" b="11256" baseLine="11203" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="11102" r="6408" b="11256">ary</wd>

<space/>

<wd l="6480" t="11054" r="6912" b="11256">(Han</wd>

<space/>

<wd l="6994" t="11074" r="7152" b="11213">et</wd>

<space/>

<wd l="7229" t="11054" r="7488" b="11246">al.,</wd>

<space/>

<wd l="7570" t="11054" r="8126" b="11256">2012),</wd>

<space/>

<wd l="8208" t="11054" r="8472" b="11213">the</wd>

<space/>

<wd l="8554" t="11054" r="8904" b="11213">first</wd>

<space/>

<wd l="8976" t="11054" r="9643" b="11213">method</wd>

<space/>

<wd l="9710" t="11074" r="9878" b="11213">to</wd>

<space/>

<wd l="9960" t="11054" r="10488" b="11213">calcu-</wd>

</ln>

<ln l="6130" t="11309" r="10488" b="11510" baseLine="11458" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="11309" r="6442" b="11467">late</wd>

<space/>

<wd l="6514" t="11309" r="6778" b="11467">the</wd>

<space/>

<wd l="6845" t="11309" r="7814" b="11510">probability</wd>

<space/>

<wd l="7891" t="11357" r="8347" b="11467">score</wd>

<space/>

<wd l="8424" t="11309" r="8621" b="11467">of</wd>

<space/>

<wd l="8669" t="11309" r="8938" b="11467">the</wd>

<space/>

<wd l="9014" t="11309" r="9931" b="11467">candidates</wd>

<space/>

<wd l="10008" t="11309" r="10147" b="11467">is</wd>

<space/>

<wd l="10219" t="11309" r="10488" b="11467">the</wd>

<space/>

</ln>

<ln l="6120" t="11558" r="10483" b="11760" baseLine="11707" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6120" t="11558" r="6989" b="11760">positional</wd>

<space/>

<wd l="7080" t="11558" r="7891" b="11760">indexing,</wd>

<space/>

<wd l="7982" t="11558" r="8520" b="11717">which</wd>

<space/>

<wd l="8602" t="11558" r="8741" b="11717">is</wd>

<space/>

<wd l="8827" t="11558" r="9427" b="11760">widely</wd>

<space/>

<wd l="9509" t="11558" r="9917" b="11717">used</wd>

<space/>

<wd l="9998" t="11558" r="10166" b="11712">in</wd>

<space/>

<wd l="10248" t="11558" r="10483" b="11712">in-</wd>

</ln>

<ln l="6130" t="11813" r="10488" b="12014" baseLine="11962" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="11813" r="6994" b="11971">formation</wd>

<space/>

<wd l="7104" t="11813" r="7834" b="11971">retrieval</wd>

<space/>

<wd l="7958" t="11832" r="8688" b="12014">systems.</wd>

<space/>

<wd l="8818" t="11813" r="9154" b="11971">The</wd>

<space/>

<wd l="9264" t="11813" r="10133" b="12014">positional</wd>

<space/>

<wd l="10253" t="11813" r="10488" b="11966">in-</wd>

</ln>

<ln l="6130" t="12062" r="10493" b="12264" baseLine="12216" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="12062" r="6725" b="12264">dexing</wd>

<space/>

<wd l="6840" t="12062" r="7286" b="12221">deals</wd>

<space/>

<wd l="7402" t="12062" r="7795" b="12221">with</wd>

<space/>

<wd l="7901" t="12062" r="8770" b="12264">positional</wd>

<space/>

<wd l="8890" t="12062" r="9677" b="12221">locations</wd>

<space/>

<wd l="9797" t="12062" r="9998" b="12221">of</wd>

<space/>

<wd l="10085" t="12082" r="10493" b="12221">term</wd>

<space/>

</ln>

<ln l="6130" t="12317" r="10488" b="12518" baseLine="12470" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="12365" r="7171" b="12475">occurrences</wd>

<space/>

<wd l="7253" t="12317" r="7771" b="12475">inside</wd>

<space/>

<wd l="7853" t="12317" r="8846" b="12475">documents.</wd>

<space/>

<wd l="8933" t="12322" r="9168" b="12475">To</wd>

<space/>

<wd l="9250" t="12317" r="9955" b="12518">compile</wd>

<space/>

<wd l="10032" t="12365" r="10128" b="12475">a</wd>

<space/>

<wd l="10195" t="12365" r="10488" b="12518">po-</wd>

</ln>

<ln l="6134" t="12571" r="10493" b="12763" baseLine="12720" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="12571" r="6768" b="12730">sitional</wd>

<space/>

<wd l="6902" t="12571" r="7387" b="12730">index</wd>

<space/>

<wd l="7517" t="12571" r="8174" b="12763">dataset,</wd>

<space/>

<wd l="8309" t="12619" r="8405" b="12730">a</wd>

<space/>

<wd l="8525" t="12571" r="9192" b="12730">method</wd>

<space/>

<wd l="9317" t="12571" r="10200" b="12730">illustrated</wd>

<space/>

<wd l="10325" t="12571" r="10493" b="12725">in</wd>

<space/>

</ln>

<ln l="6125" t="12821" r="10493" b="13022" baseLine="12974" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="12821" r="6922" b="13022">Manning</wd>

<space/>

<wd l="7018" t="12821" r="7339" b="12979">and</wd>

<space/>

<wd l="7430" t="12821" r="8309" b="13022">Raghavan</wd>

<space/>

<wd l="8410" t="12821" r="8986" b="13022">(2009)</wd>

<space/>

<wd l="9096" t="12821" r="9230" b="12979">is</wd>

<space/>

<wd l="9336" t="12821" r="9984" b="13022">applied</wd>

<space/>

<wd l="10080" t="12869" r="10296" b="12979">on</wd>

<space/>

<wd l="10397" t="12869" r="10493" b="12979">a</wd>

<space/>

</ln>

<ln l="6130" t="13075" r="10483" b="13277" baseLine="13229" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="13075" r="6888" b="13234">cleansed</wd>

<space/>

<wd l="6965" t="13075" r="7603" b="13277">portion</wd>

<space/>

<wd l="7694" t="13075" r="7896" b="13234">of</wd>

<space/>

<wd l="7958" t="13123" r="8251" b="13234">our</wd>

<space/>

<wd l="8338" t="13075" r="8986" b="13234">Twitter</wd>

<space/>

<wd l="9072" t="13123" r="9696" b="13277">corpus.</wd>

<space/>

<wd l="9797" t="13075" r="10291" b="13234">Table</wd>

<space/>

<wd l="10382" t="13075" r="10483" b="13229">2</wd>

<space/>

</ln>

<ln l="6125" t="13330" r="10488" b="13531" baseLine="13478" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="13330" r="6624" b="13488">refers</wd>

<space/>

<wd l="6715" t="13349" r="6883" b="13488">to</wd>

<space/>

<wd l="6979" t="13378" r="7176" b="13488">an</wd>

<space/>

<wd l="7272" t="13330" r="8011" b="13531">example</wd>

<space/>

<wd l="8107" t="13330" r="8309" b="13488">of</wd>

<space/>

<wd l="8376" t="13378" r="8669" b="13488">our</wd>

<space/>

<wd l="8760" t="13330" r="9538" b="13488">achieved</wd>

<space/>

<wd l="9619" t="13330" r="10488" b="13531">positional</wd>

<space/>

</ln>

<ln l="6130" t="13584" r="10488" b="13786" baseLine="13733" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="13584" r="6614" b="13742">index</wd>

<space/>

<wd l="6715" t="13584" r="7368" b="13742">dataset.</wd>

<space/>

<wd l="7474" t="13584" r="7915" b="13742">Each</wd>

<space/>

<wd l="8016" t="13584" r="8664" b="13742">Twitter</wd>

<space/>

<wd l="8760" t="13632" r="9499" b="13786">message</wd>

<space/>

<wd l="9605" t="13584" r="9744" b="13742">is</wd>

<space/>

<wd l="9850" t="13584" r="10488" b="13742">consid-</wd>

</ln>

<ln l="6130" t="13834" r="10488" b="14035" baseLine="13987" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="13834" r="6509" b="13992">ered</wd>

<space/>

<wd l="6586" t="13882" r="6758" b="13992">as</wd>

<space/>

<wd l="6845" t="13882" r="6941" b="13992">a</wd>

<space/>

<wd l="7027" t="13834" r="7541" b="14035">single</wd>

<space/>

<wd l="7627" t="13834" r="8534" b="14026">document,</wd>

<space/>

<wd l="8621" t="13834" r="8986" b="14026">and,</wd>

<space/>

<wd l="9067" t="13834" r="9634" b="14026">hence,</wd>

<space/>

<wd l="9720" t="13882" r="9816" b="13992">a</wd>

<space/>

<wd l="9893" t="13834" r="10488" b="14035">unique</wd>

<space/>

</ln>

<ln l="6130" t="14088" r="10488" b="14290" baseLine="14237" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="14088" r="6994" b="14246">document</wd>

<space/>

<wd l="7070" t="14093" r="7291" b="14242">ID</wd>

<space/>

<wd l="7373" t="14088" r="7512" b="14246">is</wd>

<space/>

<wd l="7598" t="14088" r="8357" b="14290">assigned</wd>

<space/>

<wd l="8429" t="14107" r="8592" b="14246">to</wd>

<space/>

<wd l="8678" t="14088" r="9077" b="14246">each</wd>

<space/>

<wd l="9158" t="14088" r="10061" b="14246">document.</wd>

<space/>

<wd l="10152" t="14088" r="10488" b="14246">The</wd>

<space/>

</ln>

<ln l="6130" t="14342" r="10512" b="14544" baseLine="14491" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="14342" r="7008" b="14544">frequency</wd>

<space/>

<wd l="7123" t="14342" r="7598" b="14501">value</wd>

<space/>

<wd l="7723" t="14342" r="8496" b="14501">indicates</wd>

<space/>

<wd l="8621" t="14342" r="8885" b="14501">the</wd>

<space/>

<wd l="9005" t="14342" r="9394" b="14501">total</wd>

<space/>

<wd l="9518" t="14342" r="10190" b="14501">number</wd>

<space/>

<wd l="10315" t="14342" r="10512" b="14501">of</wd>

<space/>

</ln>

<ln l="6130" t="14592" r="10483" b="14794" baseLine="14746" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="14640" r="7195" b="14794">appearances</wd>

<space/>

<wd l="7277" t="14592" r="7478" b="14750">of</wd>

<space/>

<wd l="7536" t="14640" r="7632" b="14750">a</wd>

<space/>

<wd l="7699" t="14592" r="8160" b="14750">word</wd>

<space/>

<wd l="8227" t="14592" r="8395" b="14746">in</wd>

<space/>

<wd l="8472" t="14640" r="8568" b="14750">a</wd>

<space/>

<wd l="8640" t="14592" r="9547" b="14750">document.</wd>

<space/>

<wd l="9638" t="14592" r="9974" b="14750">The</wd>

<space/>

<wd l="10046" t="14592" r="10483" b="14794">posi-</wd>

</ln>

<ln l="6125" t="14846" r="10488" b="15048" baseLine="15000" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="14846" r="6470" b="15005">tion</wd>

<space/>

<wd l="6552" t="14846" r="7109" b="15005">values</wd>

<space/>

<wd l="7200" t="14894" r="7853" b="15048">express</wd>

<space/>

<wd l="7934" t="14846" r="8203" b="15005">the</wd>

<space/>

<wd l="8290" t="14846" r="9077" b="15005">locations</wd>

<space/>

<wd l="9168" t="14846" r="9370" b="15005">of</wd>

<space/>

<wd l="9432" t="14846" r="9696" b="15005">the</wd>

<space/>

<wd l="9782" t="14846" r="10238" b="15005">word</wd>

<space/>

<wd l="10320" t="14846" r="10488" b="15000">in</wd>

<space/>

</ln>

<ln l="6125" t="15101" r="7358" b="15259" baseLine="15250" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="15101" r="6394" b="15259">the</wd>

<space/>

<wd l="6456" t="15101" r="7358" b="15259">document.</wd>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15736" r="6181" b="15977">

<para l="5804" t="15787" r="6148" b="15941" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15787" r="6082" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="26">

<wd l="5870" t="15792" r="6082" b="15941">22</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1399" marginTop="1440" marginRight="1394" marginBottom="1302" offsetX="6" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1399" t="1459" r="10515" b="15407">

<column l="1399" t="1459" r="5801" b="15407">

<rulerline l="1399" t="1680" r="5801" b="1680" type="single" width="34" color="000000"/>

<table l="1399" t="1711" r="5801" b="3245" alignment="left" spaceBefore="252" spaceAfter="34">

<bottomBorder type="single" width="34"/>

<gridTable>

<gridCol>780</gridCol>

<gridCol>1454</gridCol>

<gridCol>1171</gridCol>

<gridCol>644</gridCol>

<gridCol>353</gridCol>

<gridRow>248</gridRow>

<gridRow>254</gridRow>

<gridRow>255</gridRow>

<gridRow>254</gridRow>

<gridRow>254</gridRow>

<gridRow>269</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="19"/>

<para l="1541" t="1747" r="2074" b="1906" alignment="left" li="118" spaceAfter="2" lsp="exactly" lspExact="245" language="en">

<ln l="1541" t="1747" r="2074" b="1906" baseLine="1896" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1541" t="1747" r="2074" b="1906">Vocab</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="19"/>

<para l="2285" t="1747" r="3518" b="1906" alignment="left" li="106" spaceAfter="2" lsp="exactly" lspExact="245" language="en">

<ln l="2285" t="1747" r="3518" b="1906" baseLine="1896" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="2285" t="1752" r="3206" b="1906">Document</wd>

<space/>

<wd l="3245" t="1752" r="3518" b="1906">ID.</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="19"/>

<para l="3749" t="1752" r="4704" b="1949" alignment="left" li="116" spaceAfter="2" lsp="exactly" lspExact="245" language="en">

<ln l="3749" t="1752" r="4704" b="1949" baseLine="1896" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="3749" t="1752" r="4704" b="1949">Frequency</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="4" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="19"/>

<para l="4906" t="1752" r="5645" b="1906" alignment="left" li="102" spaceAfter="2" lsp="exactly" lspExact="245" language="en">

<ln l="4906" t="1752" r="5645" b="1906" baseLine="1896" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4906" t="1752" r="5645" b="1906">Position</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="19"/>

<para l="1517" t="2011" r="1930" b="2170" alignment="left" li="118" lsp="exactly" lspExact="243" language="en">

<ln l="1517" t="2011" r="1930" b="2170" baseLine="2165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1517" t="2011" r="1930" b="2170">have</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="19"/>

<para l="2318" t="2011" r="2381" b="2165" alignment="left" li="106" lsp="exactly" lspExact="243" language="en">

<ln l="2318" t="2011" r="2381" b="2165" baseLine="2165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2318" t="2011" r="2381" b="2165">1</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="19"/>

<para l="3763" t="2011" r="3864" b="2165" alignment="left" li="116" lsp="exactly" lspExact="243" language="en">

<ln l="3763" t="2011" r="3864" b="2165" baseLine="2165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3763" t="2011" r="3864" b="2165">2</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="19"/>

<para l="4915" t="2011" r="5179" b="2203" alignment="left" li="102" lsp="exactly" lspExact="243" language="en">

<ln l="4915" t="2011" r="5179" b="2203" baseLine="2165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4915" t="2011" r="5179" b="2203">4,9</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<topBorder type="single" width="19"/>

<para l="5448" t="1959" r="5801" b="2213" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="top">

<para l="1399" t="2213" r="2179" b="2468" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="2294" t="2266" r="2400" b="2419" alignment="left" li="106" lsp="exactly" lspExact="247" language="en">

<ln l="2294" t="2266" r="2400" b="2419" baseLine="2419" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2294" t="2266" r="2400" b="2419">4</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="3763" t="2266" r="3854" b="2424" alignment="left" li="116" lsp="exactly" lspExact="247" language="en">

<ln l="3763" t="2266" r="3854" b="2424" baseLine="2419" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3763" t="2266" r="3854" b="2424">3</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="4925" t="2266" r="5405" b="2458" alignment="left" li="102" lsp="exactly" lspExact="247" language="en">

<ln l="4925" t="2266" r="5405" b="2458" baseLine="2419" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-10">

<wd l="4925" t="2270" r="5074" b="2458">5,</wd>

<space/>

<wd l="5160" t="2266" r="5405" b="2458">11,</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="5491" t="2266" r="5678" b="2424" alignment="centered" lsp="exactly" lspExact="247" language="en">

<ln l="5491" t="2266" r="5678" b="2424" baseLine="2419" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11">

<wd l="5491" t="2266" r="5678" b="2424">18</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="1522" t="2525" r="1786" b="2678" alignment="left" li="118" lsp="exactly" lspExact="252" language="en">

<ln l="1522" t="2525" r="1786" b="2678" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1522" t="2568" r="1786" b="2678">are</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="2304" t="2525" r="2390" b="2678" alignment="left" li="106" lsp="exactly" lspExact="252" language="en">

<ln l="2304" t="2525" r="2390" b="2678" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2304" t="2525" r="2390" b="2678">5</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="3782" t="2520" r="3845" b="2674" alignment="left" li="116" lsp="exactly" lspExact="252" language="en">

<ln l="3782" t="2520" r="3845" b="2674" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3782" t="2520" r="3845" b="2674">1</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="4920" t="2520" r="5021" b="2674" alignment="left" li="102" lsp="exactly" lspExact="252" language="en">

<ln l="4920" t="2520" r="5021" b="2674" baseLine="2669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4920" t="2520" r="5021" b="2674">2</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="top">

<para l="5448" t="2468" r="5801" b="2722" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="top">

<para l="1399" t="2722" r="2179" b="2976" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<para l="2318" t="2774" r="2510" b="2928" alignment="left" li="106" lsp="exactly" lspExact="243" language="en">

<ln l="2318" t="2774" r="2510" b="2928" baseLine="2923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-9">

<wd l="2318" t="2774" r="2510" b="2928">12</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<para l="3763" t="2774" r="3864" b="2928" alignment="left" li="116" lsp="exactly" lspExact="243" language="en">

<ln l="3763" t="2774" r="3864" b="2928" baseLine="2923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3763" t="2774" r="3864" b="2928">2</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<para l="4915" t="2774" r="5242" b="2966" alignment="left" li="102" lsp="exactly" lspExact="243" language="en">

<ln l="4915" t="2774" r="5242" b="2966" baseLine="2923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="4915" t="2774" r="5074" b="2966">2,</wd>

<space/>

<wd l="5141" t="2774" r="5242" b="2933">9</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="top">

<para l="5448" t="2722" r="5801" b="2976" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="top">

<bottomBorder type="single" width="34"/>

<para l="1399" t="2976" r="2179" b="3245" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="2318" t="3024" r="2510" b="3178" alignment="left" li="106" spaceAfter="5" lsp="exactly" lspExact="253" language="en">

<ln l="2318" t="3024" r="2510" b="3178" baseLine="3178" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-9">

<wd l="2318" t="3024" r="2510" b="3178">14</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="3763" t="3024" r="3864" b="3178" alignment="left" li="116" spaceAfter="5" lsp="exactly" lspExact="253" language="en">

<ln l="3763" t="3024" r="3864" b="3178" baseLine="3178" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3763" t="3024" r="3864" b="3178">2</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="4915" t="3024" r="5333" b="3216" alignment="left" li="102" spaceAfter="5" lsp="exactly" lspExact="253" language="en">

<ln l="4915" t="3024" r="5333" b="3216" baseLine="3178" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4915" t="3024" r="5074" b="3216">2,</wd>

<space/>

<wd l="5160" t="3024" r="5333" b="3178">11</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="top">

<bottomBorder type="single" width="34"/>

<para l="5448" t="2976" r="5801" b="3245" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

</table>

<para l="1421" t="3307" r="5779" b="3720" alignment="justified" lsp="exactly" lspExact="244" language="en">

<ln l="1421" t="3307" r="5779" b="3509" baseLine="3461" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="3307" r="1920" b="3466">Table</wd>

<space/>

<wd l="2021" t="3307" r="2170" b="3466">2:</wd>

<space/>

<wd l="2290" t="3307" r="2558" b="3461">An</wd>

<space/>

<wd l="2659" t="3307" r="3398" b="3509">example</wd>

<space/>

<wd l="3504" t="3307" r="3706" b="3466">of</wd>

<space/>

<wd l="3778" t="3307" r="4046" b="3466">the</wd>

<space/>

<wd l="4142" t="3307" r="5011" b="3509">positional</wd>

<space/>

<wd l="5117" t="3307" r="5779" b="3466">indexes</wd>

<space/>

</ln>

<ln l="1421" t="3562" r="2222" b="3720" baseLine="3710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="3562" r="2222" b="3720">obtained.</wd>

</ln>

</para>

<para l="1411" t="4066" r="5794" b="5995" alignment="justified" spaceBefore="256" lsp="exactly" lspExact="252" language="en">

<ln l="1416" t="4066" r="5794" b="4267" baseLine="4219" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="4066" r="1574" b="4219">A</wd>

<space/>

<wd l="1632" t="4066" r="2606" b="4267">probability</wd>

<space/>

<wd l="2678" t="4114" r="3134" b="4224">score</wd>

<space/>

<wd l="3202" t="4066" r="3341" b="4224">is</wd>

<space/>

<wd l="3413" t="4066" r="4171" b="4267">assigned</wd>

<space/>

<wd l="4229" t="4085" r="4397" b="4224">to</wd>

<space/>

<wd l="4464" t="4066" r="4728" b="4224">the</wd>

<space/>

<wd l="4795" t="4066" r="5794" b="4224">normalized</wd>

<space/>

</ln>

<ln l="1421" t="4320" r="5784" b="4522" baseLine="4474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="4320" r="2261" b="4478">candidate</wd>

<space/>

<wd l="2328" t="4320" r="3192" b="4522">according</wd>

<space/>

<wd l="3250" t="4339" r="3418" b="4478">to</wd>

<space/>

<wd l="3485" t="4368" r="3581" b="4478">a</wd>

<space/>

<wd l="3643" t="4320" r="4666" b="4522">comparison</wd>

<space/>

<wd l="4718" t="4320" r="5458" b="4478">between</wd>

<space/>

<wd l="5515" t="4320" r="5784" b="4478">the</wd>

<space/>

</ln>

<ln l="1411" t="4574" r="5779" b="4776" baseLine="4723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="4574" r="2126" b="4776">position</wd>

<space/>

<wd l="2218" t="4574" r="2419" b="4733">of</wd>

<space/>

<wd l="2477" t="4574" r="2746" b="4733">the</wd>

<space/>

<wd l="2837" t="4574" r="3672" b="4733">candidate</wd>

<space/>

<wd l="3763" t="4574" r="4080" b="4733">and</wd>

<space/>

<wd l="4157" t="4574" r="5026" b="4776">positional</wd>

<space/>

<wd l="5117" t="4574" r="5779" b="4733">indexes</wd>

<space/>

</ln>

<ln l="1421" t="4824" r="5784" b="4982" baseLine="4978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="4824" r="1589" b="4978">in</wd>

<space/>

<wd l="1675" t="4824" r="1939" b="4982">the</wd>

<space/>

<wd l="2030" t="4824" r="2683" b="4982">dataset.</wd>

<space/>

<wd l="2774" t="4829" r="3082" b="4982">We</wd>

<space/>

<wd l="3168" t="4824" r="3562" b="4982">look</wd>

<space/>

<wd l="3643" t="4824" r="3898" b="4982">for</wd>

<space/>

<wd l="3979" t="4824" r="4248" b="4982">the</wd>

<space/>

<wd l="4339" t="4824" r="5174" b="4982">candidate</wd>

<space/>

<wd l="5266" t="4824" r="5434" b="4978">in</wd>

<space/>

<wd l="5515" t="4824" r="5784" b="4982">the</wd>

<space/>

</ln>

<ln l="1421" t="5078" r="5784" b="5237" baseLine="5232" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5078" r="2030" b="5237">dataset</wd>

<space/>

<wd l="2107" t="5078" r="2645" b="5237">where</wd>

<space/>

<wd l="2722" t="5078" r="3163" b="5237">there</wd>

<space/>

<wd l="3245" t="5078" r="3384" b="5237">is</wd>

<space/>

<wd l="3470" t="5126" r="3677" b="5237">an</wd>

<space/>

<wd l="3758" t="5126" r="4718" b="5237">occurrence</wd>

<space/>

<wd l="4805" t="5078" r="5006" b="5237">of</wd>

<space/>

<wd l="5059" t="5078" r="5328" b="5237">the</wd>

<space/>

<wd l="5414" t="5126" r="5784" b="5237">can-</wd>

</ln>

<ln l="1421" t="5333" r="5784" b="5534" baseLine="5482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5333" r="1954" b="5491">didate</wd>

<space/>

<wd l="2035" t="5333" r="2429" b="5491">with</wd>

<space/>

<wd l="2510" t="5333" r="2707" b="5491">its</wd>

<space/>

<wd l="2789" t="5333" r="3504" b="5534">position</wd>

<space/>

<wd l="3586" t="5333" r="4114" b="5491">index.</wd>

<space/>

<wd l="4205" t="5333" r="4670" b="5491">After</wd>

<space/>

<wd l="4752" t="5333" r="5784" b="5534">aggregating</wd>

<space/>

</ln>

<ln l="1416" t="5587" r="5779" b="5779" baseLine="5736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5587" r="1685" b="5746">the</wd>

<space/>

<wd l="1771" t="5587" r="2448" b="5746">number</wd>

<space/>

<wd l="2534" t="5587" r="2736" b="5746">of</wd>

<space/>

<wd l="2803" t="5635" r="3898" b="5779">occurrences,</wd>

<space/>

<wd l="3989" t="5635" r="4243" b="5746">we</wd>

<space/>

<wd l="4334" t="5587" r="5208" b="5746">normalize</wd>

<space/>

<wd l="5299" t="5587" r="5419" b="5746">it</wd>

<space/>

<wd l="5501" t="5587" r="5779" b="5746">be-</wd>

</ln>

<ln l="1416" t="5837" r="3019" b="5995" baseLine="5990" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5856" r="1944" b="5995">tween</wd>

<space/>

<wd l="2006" t="5837" r="2270" b="5995">0.0</wd>

<space/>

<wd l="2333" t="5837" r="2654" b="5995">and</wd>

<space/>

<wd l="2726" t="5837" r="3019" b="5995">1.0.</wd>

</ln>

</para>

<para l="1406" t="6091" r="5789" b="11813" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="6091" r="5784" b="6293" baseLine="6240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="6091" r="1987" b="6250">The</wd>

<space/>

<wd l="2059" t="6110" r="2438" b="6250">next</wd>

<space/>

<wd l="2501" t="6091" r="3470" b="6293">probability</wd>

<space/>

<wd l="3542" t="6091" r="4502" b="6250">calculation</wd>

<space/>

<wd l="4574" t="6091" r="5237" b="6250">method</wd>

<space/>

<wd l="5304" t="6091" r="5443" b="6250">is</wd>

<space/>

<wd l="5515" t="6091" r="5784" b="6250">the</wd>

<space/>

</ln>

<ln l="1421" t="6346" r="5779" b="6547" baseLine="6494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6346" r="3048" b="6547">dependency-based</wd>

<space/>

<wd l="3154" t="6346" r="4075" b="6547">frequency,</wd>

<space/>

<wd l="4186" t="6346" r="4728" b="6504">which</wd>

<space/>

<wd l="4834" t="6346" r="4973" b="6504">is</wd>

<space/>

<wd l="5088" t="6394" r="5285" b="6504">an</wd>

<space/>

<wd l="5400" t="6394" r="5779" b="6547">aug-</wd>

</ln>

<ln l="1416" t="6595" r="5784" b="6797" baseLine="6749" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="6595" r="2299" b="6754">mentation</wd>

<space/>

<wd l="2381" t="6595" r="2578" b="6754">of</wd>

<space/>

<wd l="2630" t="6595" r="2899" b="6754">the</wd>

<space/>

<wd l="2971" t="6595" r="3730" b="6797">previous</wd>

<space/>

<wd l="3806" t="6595" r="4507" b="6754">method.</wd>

<space/>

<wd l="4603" t="6595" r="5323" b="6797">Inspired</wd>

<space/>

<wd l="5386" t="6595" r="5611" b="6797">by</wd>

<space/>

<wd l="5688" t="6643" r="5784" b="6754">a</wd>

<space/>

</ln>

<ln l="1416" t="6850" r="5779" b="7008" baseLine="7003" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="6850" r="1877" b="7008">work</wd>

<space/>

<wd l="2006" t="6898" r="2222" b="7008">on</wd>

<space/>

<wd l="2352" t="6850" r="2621" b="7008">the</wd>

<space/>

<wd l="2760" t="6850" r="3341" b="7008">lexical</wd>

<space/>

<wd l="3480" t="6850" r="4699" b="7008">normalization</wd>

<space/>

<wd l="4838" t="6850" r="5040" b="7008">of</wd>

<space/>

<wd l="5150" t="6854" r="5779" b="7008">Tweets</wd>

<space/>

</ln>

<ln l="1421" t="7104" r="5789" b="7306" baseLine="7253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7104" r="1853" b="7306">(Han</wd>

<space/>

<wd l="1930" t="7104" r="2093" b="7262">&amp;</wd>

<space/>

<wd l="2165" t="7104" r="2957" b="7296">Baldwin,</wd>

<space/>

<wd l="3034" t="7104" r="3590" b="7306">2011),</wd>

<space/>

<wd l="3662" t="7104" r="3926" b="7262">the</wd>

<space/>

<wd l="3998" t="7104" r="4474" b="7306">noisy</wd>

<space/>

<wd l="4536" t="7104" r="5174" b="7306">portion</wd>

<space/>

<wd l="5246" t="7104" r="5448" b="7262">of</wd>

<space/>

<wd l="5496" t="7152" r="5789" b="7262">our</wd>

<space/>

</ln>

<ln l="1416" t="7354" r="5784" b="7555" baseLine="7502" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="7354" r="2102" b="7555">training</wd>

<space/>

<wd l="2174" t="7354" r="2784" b="7512">dataset</wd>

<space/>

<wd l="2856" t="7354" r="2995" b="7512">is</wd>

<space/>

<wd l="3062" t="7354" r="3648" b="7555">parsed</wd>

<space/>

<wd l="3710" t="7373" r="3874" b="7512">to</wd>

<space/>

<wd l="3955" t="7354" r="4502" b="7512">obtain</wd>

<space/>

<wd l="4574" t="7402" r="4670" b="7512">a</wd>

<space/>

<wd l="4738" t="7354" r="5784" b="7555">dependency</wd>

<space/>

</ln>

<ln l="1411" t="7608" r="5789" b="7810" baseLine="7757" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="7608" r="1853" b="7766">bank</wd>

<space/>

<wd l="1934" t="7608" r="2414" b="7810">using</wd>

<space/>

<wd l="2506" t="7656" r="2798" b="7766">our</wd>

<space/>

<wd l="2890" t="7608" r="3576" b="7810">adapted</wd>

<space/>

<wd l="3658" t="7608" r="4306" b="7766">version</wd>

<space/>

<wd l="4402" t="7608" r="4603" b="7766">of</wd>

<space/>

<wd l="4670" t="7608" r="4934" b="7766">the</wd>

<space/>

<wd l="5035" t="7608" r="5789" b="7766">Stanford</wd>

<space/>

</ln>

<ln l="1421" t="7862" r="5779" b="8064" baseLine="8011" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7862" r="2472" b="8064">dependency</wd>

<space/>

<wd l="2592" t="7910" r="3139" b="8064">parser</wd>

<space/>

<wd l="3269" t="7862" r="4205" b="8064">(Marneffe,</wd>

<space/>

<wd l="4344" t="7862" r="5477" b="8064">MacCartney,</wd>

<space/>

<wd l="5616" t="7862" r="5779" b="8021">&amp;</wd>

<space/>

</ln>

<ln l="1416" t="8117" r="5784" b="8318" baseLine="8266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="8117" r="2261" b="8318">Manning,</wd>

<space/>

<wd l="2338" t="8117" r="2890" b="8318">2006).</wd>

<space/>

<wd l="2981" t="8117" r="3456" b="8275">Since</wd>

<space/>

<wd l="3528" t="8165" r="3821" b="8275">our</wd>

<space/>

<wd l="3888" t="8117" r="4214" b="8275">aim</wd>

<space/>

<wd l="4282" t="8117" r="4421" b="8275">is</wd>

<space/>

<wd l="4493" t="8136" r="4771" b="8275">not</wd>

<space/>

<wd l="4838" t="8136" r="5006" b="8275">to</wd>

<space/>

<wd l="5069" t="8117" r="5784" b="8318">perform</wd>

<space/>

</ln>

<ln l="1421" t="8366" r="5779" b="8568" baseLine="8515" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8366" r="1939" b="8525">actual</wd>

<space/>

<wd l="2011" t="8366" r="3058" b="8568">dependency</wd>

<space/>

<wd l="3110" t="8366" r="3811" b="8568">parsing,</wd>

<space/>

<wd l="3878" t="8366" r="4147" b="8525">the</wd>

<space/>

<wd l="4214" t="8366" r="5261" b="8568">dependency</wd>

<space/>

<wd l="5318" t="8386" r="5779" b="8568">types</wd>

<space/>

</ln>

<ln l="1421" t="8621" r="5779" b="8822" baseLine="8770" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8669" r="1685" b="8779">are</wd>

<space/>

<wd l="1776" t="8640" r="2059" b="8779">not</wd>

<space/>

<wd l="2150" t="8621" r="3000" b="8779">extracted.</wd>

<space/>

<wd l="3101" t="8621" r="3259" b="8774">A</wd>

<space/>

<wd l="3350" t="8621" r="4109" b="8779">cleansed</wd>

<space/>

<wd l="4200" t="8669" r="4776" b="8822">corpus</wd>

<space/>

<wd l="4877" t="8621" r="5016" b="8779">is</wd>

<space/>

<wd l="5107" t="8640" r="5390" b="8779">not</wd>

<space/>

<wd l="5477" t="8621" r="5779" b="8779">uti-</wd>

</ln>

<ln l="1421" t="8875" r="5784" b="9077" baseLine="9024" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8875" r="1853" b="9034">lized</wd>

<space/>

<wd l="1910" t="8875" r="2606" b="9034">because</wd>

<space/>

<wd l="2674" t="8875" r="2942" b="9034">the</wd>

<space/>

<wd l="3005" t="8894" r="3960" b="9077">percentage</wd>

<space/>

<wd l="4032" t="8875" r="4234" b="9034">of</wd>

<space/>

<wd l="4282" t="8880" r="4507" b="9034">IV</wd>

<space/>

<wd l="4574" t="8875" r="5112" b="9034">words</wd>

<space/>

<wd l="5184" t="8875" r="5323" b="9034">is</wd>

<space/>

<wd l="5395" t="8875" r="5784" b="9077">high</wd>

<space/>

</ln>

<ln l="1421" t="9125" r="5779" b="9326" baseLine="9274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9125" r="2064" b="9326">enough</wd>

<space/>

<wd l="2189" t="9125" r="2357" b="9278">in</wd>

<space/>

<wd l="2477" t="9125" r="2746" b="9283">the</wd>

<space/>

<wd l="2870" t="9173" r="3499" b="9326">corpus,</wd>

<space/>

<wd l="3634" t="9125" r="3950" b="9283">and</wd>

<space/>

<wd l="4070" t="9125" r="4238" b="9278">in</wd>

<space/>

<wd l="4358" t="9125" r="4622" b="9283">the</wd>

<space/>

<wd l="4742" t="9125" r="5779" b="9326">probability-</wd>

</ln>

<ln l="1416" t="9379" r="5779" b="9581" baseLine="9528" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="9379" r="2338" b="9581">measuring</wd>

<space/>

<wd l="2390" t="9379" r="2947" b="9581">phase,</wd>

<space/>

<wd l="3019" t="9379" r="3490" b="9538">OOV</wd>

<space/>

<wd l="3552" t="9379" r="4085" b="9538">words</wd>

<space/>

<wd l="4152" t="9427" r="4416" b="9538">are</wd>

<space/>

<wd l="4483" t="9379" r="5126" b="9581">already</wd>

<space/>

<wd l="5189" t="9379" r="5779" b="9538">detect-</wd>

</ln>

<ln l="1421" t="9634" r="5789" b="9835" baseLine="9782">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1421" t="9634" r="1670" b="9792">ed.</wd>

<space/>

<wd l="1752" t="9638" r="2059" b="9792">For</wd>

<space/>

<wd l="2131" t="9634" r="2918" b="9835">example,</wd>

<space/>

<wd l="3000" t="9634" r="3427" b="9792">from</wd>

<space/>

<wd l="3494" t="9682" r="3590" b="9792">a</wd>

<space/>

<wd l="3667" t="9653" r="4416" b="9792">sentence</wd>

<space/>

<wd l="4498" t="9634" r="4891" b="9792">such</wd>

<space/>

<wd l="4963" t="9682" r="5141" b="9792">as</wd>

<space/>

</run>

<wd l="5218" t="9634" r="5400" b="9787"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">I</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5453" t="9634" r="5789" b="9792">will</wd>

<space/>

</run>

</ln>

<ln l="1406" t="9888" r="5779" b="10090" baseLine="10037">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1406" t="9941" r="1632" b="10090">go</wd>

<space/>

<wd l="1709" t="9917" r="1872" b="10046">to</wd>

<space/>

<wd l="1934" t="9888" r="2611" b="10046">London</wd>

<space/>

<wd l="2688" t="9888" r="2894" b="10090">by</wd>

<space/>

<wd l="2962" t="9917" r="3331" b="10046">next</wd>

<space/>

</run>

<wd l="3394" t="9888" r="3984" b="10080"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">week</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="4056" t="9888" r="4541" b="10090"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">next</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4608" t="9941" r="4829" b="10090">go</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4901" t="9888" r="5203" b="10090">+3)</wd>

<space/>

<wd l="5280" t="9888" r="5419" b="10046">is</wd>

<space/>

<wd l="5496" t="9888" r="5779" b="10046">ob-</wd>

</run>

</ln>

<ln l="1416" t="10138" r="5779" b="10339" baseLine="10286">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1416" t="10138" r="2006" b="10330">tained,</wd>

<space/>

<wd l="2126" t="10138" r="3000" b="10339">indicating</wd>

<space/>

<wd l="3110" t="10138" r="3442" b="10296">that</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3552" t="10166" r="3922" b="10296">next</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4032" t="10186" r="4694" b="10339">appears</wd>

<space/>

<wd l="4805" t="10157" r="5131" b="10296">two</wd>

<space/>

<wd l="5246" t="10138" r="5779" b="10296">words</wd>

<space/>

</run>

</ln>

<ln l="1421" t="10387" r="5784" b="10589" baseLine="10541">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1421" t="10387" r="1824" b="10546">after</wd>

<space/>

</run>

<wd l="1882" t="10440" r="2155" b="10589"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">go</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2242" t="10387" r="2578" b="10546">The</wd>

<space/>

<wd l="2659" t="10387" r="3773" b="10589">aggregations</wd>

<space/>

<wd l="3850" t="10387" r="4051" b="10546">of</wd>

<space/>

<wd l="4109" t="10387" r="4320" b="10546">all</wd>

<space/>

<wd l="4392" t="10387" r="4661" b="10546">the</wd>

<space/>

<wd l="4738" t="10387" r="5784" b="10589">dependency</wd>

<space/>

</run>

</ln>

<ln l="1426" t="10642" r="5784" b="10834" baseLine="10795" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10690" r="2016" b="10834">scores,</wd>

<space/>

<wd l="2122" t="10642" r="2659" b="10800">which</wd>

<space/>

<wd l="2765" t="10690" r="3024" b="10800">are</wd>

<space/>

<wd l="3130" t="10642" r="3658" b="10800">called</wd>

<space/>

<wd l="3754" t="10642" r="4714" b="10800">confidence</wd>

<space/>

<wd l="4824" t="10690" r="5410" b="10834">scores,</wd>

<space/>

<wd l="5520" t="10690" r="5784" b="10800">are</wd>

<space/>

</ln>

<ln l="1426" t="10896" r="5779" b="11098" baseLine="11045" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10896" r="1963" b="11054">stored</wd>

<space/>

<wd l="2040" t="10896" r="2208" b="11050">in</wd>

<space/>

<wd l="2285" t="10896" r="2549" b="11054">the</wd>

<space/>

<wd l="2635" t="10896" r="3682" b="11098">dependency</wd>

<space/>

<wd l="3754" t="10896" r="4229" b="11054">bank.</wd>

<space/>

<wd l="4320" t="10896" r="4478" b="11050">A</wd>

<space/>

<wd l="4560" t="10896" r="5424" b="11098">five-gram</wd>

<space/>

<wd l="5506" t="10896" r="5779" b="11054">de-</wd>

</ln>

<ln l="1411" t="11150" r="5784" b="11352" baseLine="11299" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="11150" r="2261" b="11352">pendency</wd>

<space/>

<wd l="2338" t="11150" r="2774" b="11309">bank</wd>

<space/>

<wd l="2856" t="11150" r="2995" b="11309">is</wd>

<space/>

<wd l="3077" t="11150" r="3859" b="11352">prepared</wd>

<space/>

<wd l="3931" t="11150" r="4608" b="11309">without</wd>

<space/>

<wd l="4690" t="11150" r="5170" b="11352">using</wd>

<space/>

<wd l="5251" t="11198" r="5347" b="11309">a</wd>

<space/>

<wd l="5429" t="11170" r="5784" b="11309">root</wd>

<space/>

</ln>

<ln l="1416" t="11400" r="5789" b="11602" baseLine="11554" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11400" r="1843" b="11558">node</wd>

<space/>

<wd l="1925" t="11400" r="3058" b="11602">(head-word),</wd>

<space/>

<wd l="3139" t="11400" r="3470" b="11558">that</wd>

<space/>

<wd l="3552" t="11400" r="3739" b="11592">is,</wd>

<space/>

<wd l="3821" t="11400" r="4090" b="11558">the</wd>

<space/>

<wd l="4162" t="11448" r="4824" b="11602">process</wd>

<space/>

<wd l="4906" t="11400" r="5045" b="11558">is</wd>

<space/>

<wd l="5131" t="11400" r="5789" b="11558">iterated</wd>

<space/>

</ln>

<ln l="1421" t="11654" r="3946" b="11813" baseLine="11803" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11654" r="1680" b="11813">for</wd>

<space/>

<wd l="1738" t="11654" r="1944" b="11813">all</wd>

<space/>

<wd l="2006" t="11654" r="2539" b="11813">words</wd>

<space/>

<wd l="2602" t="11654" r="2770" b="11808">in</wd>

<space/>

<wd l="2822" t="11654" r="3091" b="11813">the</wd>

<space/>

<wd l="3158" t="11674" r="3946" b="11813">sentence.</wd>

</ln>

</para>

<para l="1416" t="11909" r="5789" b="13882" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1646" t="11909" r="5779" b="12110" baseLine="12058" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="11909" r="1805" b="12062">A</wd>

<space/>

<wd l="1867" t="11909" r="2842" b="12110">probability</wd>

<space/>

<wd l="2918" t="11957" r="3370" b="12067">score</wd>

<space/>

<wd l="3437" t="11909" r="4171" b="12067">between</wd>

<space/>

<wd l="4248" t="11909" r="4512" b="12067">0.0</wd>

<space/>

<wd l="4589" t="11909" r="4910" b="12067">and</wd>

<space/>

<wd l="4997" t="11909" r="5242" b="12067">1.0</wd>

<space/>

<wd l="5318" t="11909" r="5458" b="12067">is</wd>

<space/>

<wd l="5534" t="11957" r="5779" b="12067">as-</wd>

</ln>

<ln l="1426" t="12158" r="5784" b="12360" baseLine="12312" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="12158" r="1997" b="12360">signed</wd>

<space/>

<wd l="2126" t="12178" r="2294" b="12317">to</wd>

<space/>

<wd l="2434" t="12158" r="2832" b="12317">each</wd>

<space/>

<wd l="2966" t="12158" r="3850" b="12317">candidate.</wd>

<space/>

<wd l="3994" t="12158" r="4152" b="12312">A</wd>

<space/>

<wd l="4282" t="12158" r="4939" b="12317">relative</wd>

<space/>

<wd l="5069" t="12158" r="5784" b="12360">position</wd>

<space/>

</ln>

<ln l="1426" t="12413" r="5789" b="12614" baseLine="12566">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="12461" r="1882" b="12571">score</wd>

<space/>

<wd l="1997" t="12413" r="2165" b="12566">in</wd>

<space/>

<wd l="2270" t="12413" r="2539" b="12571">the</wd>

<space/>

<wd l="2654" t="12413" r="3082" b="12571">form</wd>

<space/>

<wd l="3192" t="12413" r="3394" b="12571">of</wd>

<space/>

</run>

<wd l="3485" t="12413" r="4421" b="12614"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">candidate</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="4531" t="12413" r="5030" b="12605"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">word</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5155" t="12442" r="5789" b="12571">context</wd>

<space/>

</run>

</ln>

<ln l="1416" t="12667" r="5784" b="12869" baseLine="12816">

<wd l="1416" t="12667" r="1920" b="12859"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">word</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2011" t="12667" r="2794" b="12869">position)</wd>

<space/>

<wd l="2890" t="12667" r="3029" b="12826">is</wd>

<space/>

<wd l="3130" t="12667" r="4018" b="12826">calculated</wd>

<space/>

<wd l="4109" t="12667" r="4363" b="12826">for</wd>

<space/>

<wd l="4454" t="12667" r="4853" b="12826">each</wd>

<space/>

<wd l="4944" t="12667" r="5784" b="12826">candidate</wd>

<space/>

</run>

</ln>

<ln l="1416" t="12917" r="5789" b="13075" baseLine="13070" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12917" r="1982" b="13075">within</wd>

<space/>

<wd l="2059" t="12965" r="2155" b="13075">a</wd>

<space/>

<wd l="2227" t="12936" r="2870" b="13075">context</wd>

<space/>

<wd l="2942" t="12917" r="3653" b="13075">window</wd>

<space/>

<wd l="3730" t="12917" r="3926" b="13075">of</wd>

<space/>

<wd l="3979" t="12936" r="4306" b="13075">two</wd>

<space/>

<wd l="4382" t="12917" r="4915" b="13075">words</wd>

<space/>

<wd l="4997" t="12965" r="5213" b="13075">on</wd>

<space/>

<wd l="5290" t="12917" r="5789" b="13075">either</wd>

<space/>

</ln>

<ln l="1426" t="13171" r="5779" b="13373" baseLine="13325" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13171" r="1819" b="13330">side.</wd>

<space/>

<wd l="1925" t="13171" r="2261" b="13330">The</wd>

<space/>

<wd l="2362" t="13171" r="3120" b="13330">obtained</wd>

<space/>

<wd l="3211" t="13171" r="3864" b="13330">relative</wd>

<space/>

<wd l="3955" t="13171" r="4675" b="13373">position</wd>

<space/>

<wd l="4771" t="13171" r="4973" b="13330">of</wd>

<space/>

<wd l="5050" t="13219" r="5146" b="13330">a</wd>

<space/>

<wd l="5242" t="13171" r="5779" b="13330">candi-</wd>

</ln>

<ln l="1421" t="13426" r="5784" b="13627" baseLine="13574" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13426" r="1786" b="13584">date</wd>

<space/>

<wd l="1901" t="13426" r="2040" b="13584">is</wd>

<space/>

<wd l="2160" t="13426" r="3024" b="13627">compared</wd>

<space/>

<wd l="3130" t="13426" r="3523" b="13584">with</wd>

<space/>

<wd l="3634" t="13426" r="3898" b="13584">the</wd>

<space/>

<wd l="4018" t="13426" r="4709" b="13627">existing</wd>

<space/>

<wd l="4824" t="13426" r="5784" b="13584">confidence</wd>

<space/>

</ln>

<ln l="1426" t="13680" r="4061" b="13882" baseLine="13829" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13728" r="1882" b="13838">score</wd>

<space/>

<wd l="1944" t="13680" r="2112" b="13834">in</wd>

<space/>

<wd l="2165" t="13680" r="2434" b="13838">the</wd>

<space/>

<wd l="2491" t="13680" r="3538" b="13882">dependency</wd>

<space/>

<wd l="3586" t="13680" r="4061" b="13838">bank.</wd>

</ln>

</para>

<para l="1416" t="13930" r="5789" b="15398" alignment="justified" fli="216" lsp="exactly" lspExact="251" language="en">

<ln l="1651" t="13930" r="5784" b="14131" baseLine="14083" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="13930" r="1987" b="14088">The</wd>

<space/>

<wd l="2069" t="13930" r="2491" b="14088">third</wd>

<space/>

<wd l="2563" t="13930" r="3230" b="14088">method</wd>

<space/>

<wd l="3307" t="13930" r="3509" b="14088">of</wd>

<space/>

<wd l="3562" t="13930" r="4536" b="14131">probability</wd>

<space/>

<wd l="4613" t="13949" r="5784" b="14088">measurement</wd>

<space/>

</ln>

<ln l="1421" t="14184" r="5779" b="14386" baseLine="14333" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="14184" r="2280" b="14342">calculates</wd>

<space/>

<wd l="2371" t="14184" r="2640" b="14342">the</wd>

<space/>

<wd l="2722" t="14184" r="3821" b="14386">probabilities</wd>

<space/>

<wd l="3907" t="14184" r="4421" b="14342">based</wd>

<space/>

<wd l="4502" t="14232" r="4718" b="14342">on</wd>

<space/>

<wd l="4810" t="14232" r="4906" b="14342">a</wd>

<space/>

<wd l="4992" t="14184" r="5779" b="14386">language</wd>

<space/>

</ln>

<ln l="1416" t="14438" r="5779" b="14640" baseLine="14587" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="14438" r="2011" b="14597">model.</wd>

<space/>

<wd l="2098" t="14438" r="2434" b="14597">The</wd>

<space/>

<wd l="2510" t="14438" r="3269" b="14597">cleansed</wd>

<space/>

<wd l="3331" t="14458" r="3677" b="14640">part</wd>

<space/>

<wd l="3754" t="14438" r="3950" b="14597">of</wd>

<space/>

<wd l="4003" t="14486" r="4296" b="14597">our</wd>

<space/>

<wd l="4363" t="14438" r="5050" b="14640">training</wd>

<space/>

<wd l="5122" t="14438" r="5779" b="14630">dataset,</wd>

<space/>

</ln>

<ln l="1416" t="14688" r="5789" b="14880" baseLine="14842" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="14688" r="1958" b="14846">which</wd>

<space/>

<wd l="2021" t="14688" r="2712" b="14846">consists</wd>

<space/>

<wd l="2779" t="14688" r="2981" b="14846">of</wd>

<space/>

<wd l="3019" t="14736" r="3470" b="14846">more</wd>

<space/>

<wd l="3533" t="14688" r="3907" b="14846">than</wd>

<space/>

<wd l="3984" t="14688" r="4574" b="14880">55,000</wd>

<space/>

<wd l="4642" t="14688" r="5227" b="14880">words,</wd>

<space/>

<wd l="5299" t="14688" r="5438" b="14846">is</wd>

<space/>

<wd l="5506" t="14688" r="5789" b="14846">fed</wd>

<space/>

</ln>

<ln l="1421" t="14942" r="5779" b="15144" baseLine="15096" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="14942" r="1757" b="15101">into</wd>

<space/>

<wd l="1829" t="14942" r="2482" b="15101">SRILM</wd>

<space/>

<wd l="2549" t="14942" r="3326" b="15144">(Stolcke,</wd>

<space/>

<wd l="3394" t="14942" r="3893" b="15144">2002)</wd>

<space/>

<wd l="3955" t="14962" r="4123" b="15101">to</wd>

<space/>

<wd l="4190" t="14942" r="4891" b="15144">compile</wd>

<space/>

<wd l="4954" t="14990" r="5050" b="15101">a</wd>

<space/>

<wd l="5098" t="14942" r="5779" b="15101">bidirec-</wd>

</ln>

<ln l="1416" t="15197" r="5784" b="15398" baseLine="15346" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="15197" r="1915" b="15355">tional</wd>

<space/>

<wd l="1987" t="15197" r="2635" b="15398">trigram</wd>

<space/>

<wd l="2698" t="15202" r="3024" b="15350">LM</wd>

<space/>

<wd l="3086" t="15197" r="3312" b="15398">by</wd>

<space/>

<wd l="3379" t="15197" r="4320" b="15398">employing</wd>

<space/>

<wd l="4382" t="15197" r="4651" b="15355">the</wd>

<space/>

<wd l="4718" t="15202" r="5784" b="15398">Kneser-Ney</wd>

<space/>

</ln>

</para>

</column>

<column l="6113" t="1459" r="10515" b="15407">

<para l="6125" t="1459" r="10483" b="2419" alignment="justified" lsp="exactly" lspExact="242" language="en">

<ln l="6130" t="1459" r="10483" b="1661" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="1459" r="7051" b="1661">smoothing</wd>

<space/>

<wd l="7138" t="1459" r="8035" b="1661">algorithm.</wd>

<space/>

<wd l="8136" t="1464" r="8371" b="1618">To</wd>

<space/>

<wd l="8467" t="1459" r="9240" b="1618">calculate</wd>

<space/>

<wd l="9326" t="1459" r="9595" b="1618">the</wd>

<space/>

<wd l="9677" t="1459" r="10483" b="1661">probabil-</wd>

</ln>

<ln l="6125" t="1714" r="10483" b="1915" baseLine="1862" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="1714" r="6355" b="1915">ity</wd>

<space/>

<wd l="6422" t="1714" r="6624" b="1872">of</wd>

<space/>

<wd l="6672" t="1714" r="7070" b="1872">each</wd>

<space/>

<wd l="7138" t="1714" r="8026" b="1906">candidate,</wd>

<space/>

<wd l="8093" t="1762" r="8347" b="1872">we</wd>

<space/>

<wd l="8414" t="1714" r="8822" b="1872">used</wd>

<space/>

<wd l="8885" t="1762" r="8981" b="1872">a</wd>

<space/>

<wd l="9038" t="1714" r="9518" b="1872">beam</wd>

<space/>

<wd l="9590" t="1714" r="10142" b="1872">search</wd>

<space/>

<wd l="10210" t="1714" r="10483" b="1872">de-</wd>

</ln>

<ln l="6125" t="1968" r="10483" b="2170" baseLine="2117" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="1968" r="6614" b="2126">coder</wd>

<space/>

<wd l="6691" t="1968" r="7378" b="2170">through</wd>

<space/>

<wd l="7459" t="1968" r="7723" b="2126">the</wd>

<space/>

<wd l="7805" t="1973" r="8376" b="2126">Moses</wd>

<space/>

<wd l="8467" t="1968" r="9163" b="2126">decoder</wd>

<space/>

<wd l="9245" t="1968" r="9902" b="2170">(Koehn</wd>

<space/>

<wd l="9984" t="1987" r="10142" b="2126">et</wd>

<space/>

<wd l="10224" t="1968" r="10483" b="2160">al.,</wd>

<space/>

</ln>

<ln l="6125" t="2218" r="6682" b="2419" baseLine="2371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="2218" r="6682" b="2419">2007).</wd>

</ln>

</para>

<para l="6125" t="2635" r="10267" b="2837" alignment="left" spaceBefore="169" lsp="exactly" lspExact="249" language="en">

<ln l="6125" t="2635" r="10267" b="2837" baseLine="2789" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="5">

<wd l="6125" t="2635" r="6389" b="2794">5.2</wd>

<space/>

<wd l="6706" t="2635" r="7541" b="2837">Selecting</wd>

<space/>

<wd l="7598" t="2640" r="7886" b="2794">the</wd>

<space/>

<wd l="7949" t="2650" r="8395" b="2794">most</wd>

<space/>

<wd l="8453" t="2640" r="9293" b="2837">probable</wd>

<space/>

<wd l="9355" t="2635" r="10267" b="2794">candidate</wd>

</ln>

</para>

<para l="6115" t="3005" r="10493" b="6749" alignment="justified" spaceBefore="112" lsp="exactly" lspExact="251" language="en">

<ln l="6120" t="3005" r="10483" b="3206" baseLine="3158" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3005" r="6888" b="3163">Previous</wd>

<space/>

<wd l="6989" t="3005" r="7522" b="3163">works</wd>

<space/>

<wd l="7622" t="3053" r="7838" b="3163">on</wd>

<space/>

<wd l="7944" t="3005" r="8630" b="3206">spelling</wd>

<space/>

<wd l="8726" t="3005" r="9619" b="3163">correction</wd>

<space/>

<wd l="9715" t="3005" r="10032" b="3163">and</wd>

<space/>

<wd l="10123" t="3053" r="10483" b="3163">nor-</wd>

</ln>

<ln l="6120" t="3259" r="10488" b="3451" baseLine="3408" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3259" r="7051" b="3418">malization</wd>

<space/>

<wd l="7114" t="3259" r="7522" b="3418">used</wd>

<space/>

<wd l="7574" t="3259" r="7843" b="3418">the</wd>

<space/>

<wd l="7915" t="3307" r="8477" b="3418">source</wd>

<space/>

<wd l="8544" t="3259" r="9221" b="3418">channel</wd>

<space/>

<wd l="9283" t="3259" r="9883" b="3451">model,</wd>

<space/>

<wd l="9950" t="3259" r="10488" b="3418">which</wd>

<space/>

</ln>

<ln l="6125" t="3509" r="10493" b="3710" baseLine="3662" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3509" r="6264" b="3667">is</wd>

<space/>

<wd l="6374" t="3509" r="6720" b="3667">also</wd>

<space/>

<wd l="6821" t="3509" r="7421" b="3667">known</wd>

<space/>

<wd l="7522" t="3557" r="7694" b="3667">as</wd>

<space/>

<wd l="7800" t="3509" r="8064" b="3667">the</wd>

<space/>

<wd l="8165" t="3509" r="8640" b="3710">noisy</wd>

<space/>

<wd l="8741" t="3509" r="9422" b="3667">channel</wd>

<space/>

<wd l="9523" t="3509" r="10070" b="3667">model</wd>

<space/>

<wd l="10176" t="3509" r="10493" b="3667">and</wd>

<space/>

</ln>

<ln l="6115" t="3763" r="10483" b="3965" baseLine="3917" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="3768" r="6643" b="3922">Naïve</wd>

<space/>

<wd l="6739" t="3768" r="7272" b="3965">Bayes</wd>

<space/>

<wd l="7373" t="3763" r="8213" b="3965">(Beaufort</wd>

<space/>

<wd l="8309" t="3782" r="8462" b="3922">et</wd>

<space/>

<wd l="8554" t="3763" r="8813" b="3955">al.,</wd>

<space/>

<wd l="8918" t="3763" r="9398" b="3955">2010;</wd>

<space/>

<wd l="9504" t="3763" r="10483" b="3965">Kernighan,</wd>

<space/>

</ln>

<ln l="6125" t="4018" r="10483" b="4219" baseLine="4166" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="4018" r="6821" b="4210">Church,</wd>

<space/>

<wd l="6989" t="4018" r="7152" b="4176">&amp;</wd>

<space/>

<wd l="7315" t="4018" r="7776" b="4210">Gale,</wd>

<space/>

<wd l="7963" t="4018" r="8424" b="4210">1990;</wd>

<space/>

<wd l="8597" t="4022" r="9134" b="4219">Mays,</wd>

<space/>

<wd l="9298" t="4022" r="10152" b="4210">Damerau,</wd>

<space/>

<wd l="10320" t="4018" r="10483" b="4176">&amp;</wd>

<space/>

</ln>

<ln l="6120" t="4272" r="10483" b="4464" baseLine="4421" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="4277" r="6806" b="4464">Mercer,</wd>

<space/>

<wd l="6922" t="4272" r="7382" b="4464">1991;</wd>

<space/>

<wd l="7483" t="4277" r="8424" b="4430">Toutanova</wd>

<space/>

<wd l="8506" t="4272" r="8669" b="4430">&amp;</wd>

<space/>

<wd l="8755" t="4277" r="9394" b="4464">Moore,</wd>

<space/>

<wd l="9490" t="4272" r="9970" b="4464">2002;</wd>

<space/>

<wd l="10066" t="4277" r="10483" b="4464">Xue,</wd>

<space/>

</ln>

<ln l="6120" t="4522" r="10483" b="4723" baseLine="4670" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="4522" r="6499" b="4714">Yin,</wd>

<space/>

<wd l="6619" t="4522" r="6782" b="4680">&amp;</wd>

<space/>

<wd l="6888" t="4522" r="7670" b="4714">Davison,</wd>

<space/>

<wd l="7786" t="4522" r="8448" b="4723">2011b).</wd>

<space/>

<wd l="8568" t="4526" r="8741" b="4675">In</wd>

<space/>

<wd l="8846" t="4522" r="9115" b="4680">the</wd>

<space/>

<wd l="9226" t="4522" r="9701" b="4723">noisy</wd>

<space/>

<wd l="9811" t="4522" r="10483" b="4680">channel</wd>

<space/>

</ln>

<ln l="6125" t="4776" r="10493" b="4978" baseLine="4925" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="4776" r="6979" b="4978">approach,</wd>

<space/>

<wd l="7051" t="4824" r="7306" b="4934">we</wd>

<space/>

<wd l="7378" t="4776" r="8054" b="4934">observe</wd>

<space/>

<wd l="8126" t="4776" r="8390" b="4934">the</wd>

<space/>

<wd l="8467" t="4776" r="9427" b="4934">conversion</wd>

<space/>

<wd l="9499" t="4776" r="9701" b="4934">of</wd>

<space/>

<wd l="9754" t="4776" r="10493" b="4934">standard</wd>

<space/>

</ln>

<ln l="6120" t="5030" r="10493" b="5232" baseLine="5179" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5030" r="6658" b="5189">words</wd>

<space/>

<wd l="6720" t="5050" r="6888" b="5189">to</wd>

<space/>

<wd l="6955" t="5030" r="7430" b="5232">noisy</wd>

<space/>

<wd l="7493" t="5030" r="8026" b="5189">words</wd>

<space/>

<wd l="8098" t="5030" r="8266" b="5184">in</wd>

<space/>

<wd l="8328" t="5078" r="8424" b="5189">a</wd>

<space/>

<wd l="8482" t="5030" r="9168" b="5232">training</wd>

<space/>

<wd l="9226" t="5030" r="9730" b="5232">phase</wd>

<space/>

<wd l="9797" t="5030" r="9965" b="5184">in</wd>

<space/>

<wd l="10027" t="5030" r="10493" b="5189">order</wd>

<space/>

</ln>

<ln l="6120" t="5280" r="10483" b="5482" baseLine="5429" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5299" r="6288" b="5438">to</wd>

<space/>

<wd l="6355" t="5280" r="6816" b="5438">build</wd>

<space/>

<wd l="6883" t="5328" r="6979" b="5438">a</wd>

<space/>

<wd l="7042" t="5280" r="7632" b="5438">model.</wd>

<space/>

<wd l="7714" t="5285" r="7891" b="5434">In</wd>

<space/>

<wd l="7954" t="5280" r="8222" b="5438">the</wd>

<space/>

<wd l="8285" t="5280" r="9187" b="5482">prediction</wd>

<space/>

<wd l="9250" t="5280" r="9802" b="5482">phase,</wd>

<space/>

<wd l="9874" t="5280" r="10142" b="5438">the</wd>

<space/>

<wd l="10214" t="5280" r="10483" b="5438">de-</wd>

</ln>

<ln l="6125" t="5534" r="10488" b="5736" baseLine="5683" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5534" r="6614" b="5693">coder</wd>

<space/>

<wd l="6744" t="5582" r="7046" b="5693">can</wd>

<space/>

<wd l="7181" t="5534" r="7670" b="5693">select</wd>

<space/>

<wd l="7800" t="5534" r="8064" b="5693">the</wd>

<space/>

<wd l="8194" t="5554" r="8621" b="5693">most</wd>

<space/>

<wd l="8746" t="5534" r="9518" b="5736">probable</wd>

<space/>

<wd l="9653" t="5534" r="10488" b="5693">candidate</wd>

<space/>

</ln>

<ln l="6115" t="5789" r="10483" b="5947" baseLine="5938" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="5789" r="6629" b="5947">based</wd>

<space/>

<wd l="6710" t="5837" r="6926" b="5947">on</wd>

<space/>

<wd l="7008" t="5789" r="7272" b="5947">the</wd>

<space/>

<wd l="7363" t="5789" r="8122" b="5947">obtained</wd>

<space/>

<wd l="8198" t="5789" r="8789" b="5947">model.</wd>

<space/>

<wd l="8885" t="5789" r="9221" b="5947">The</wd>

<space/>

<wd l="9312" t="5789" r="10147" b="5947">candidate</wd>

<space/>

<wd l="10238" t="5837" r="10483" b="5947">se-</wd>

</ln>

<ln l="6125" t="6038" r="10483" b="6240" baseLine="6187" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6038" r="6720" b="6197">lection</wd>

<space/>

<wd l="6826" t="6038" r="6965" b="6197">is</wd>

<space/>

<wd l="7070" t="6038" r="8280" b="6240">accomplished</wd>

<space/>

<wd l="8366" t="6038" r="8880" b="6197">based</wd>

<space/>

<wd l="8976" t="6086" r="9192" b="6197">on</wd>

<space/>

<wd l="9293" t="6038" r="9682" b="6240">only</wd>

<space/>

<wd l="9778" t="6058" r="10104" b="6197">two</wd>

<space/>

<wd l="10200" t="6086" r="10483" b="6240">pa-</wd>

</ln>

<ln l="6120" t="6293" r="10483" b="6485" baseLine="6442" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6312" r="6922" b="6451">rameters:</wd>

<space/>

<wd l="7070" t="6293" r="7339" b="6451">the</wd>

<space/>

<wd l="7469" t="6298" r="7795" b="6446">LM</wd>

<space/>

<wd l="7934" t="6293" r="8256" b="6451">and</wd>

<space/>

<wd l="8386" t="6341" r="8813" b="6451">error</wd>

<space/>

<wd l="8938" t="6293" r="9538" b="6485">model,</wd>

<space/>

<wd l="9672" t="6293" r="10214" b="6451">which</wd>

<space/>

<wd l="10349" t="6293" r="10483" b="6451">is</wd>

<space/>

</ln>

<ln l="6125" t="6547" r="7987" b="6749" baseLine="6696" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6547" r="6994" b="6749">computed</wd>

<space/>

<wd l="7051" t="6595" r="7224" b="6706">as</wd>

<space/>

<wd l="7286" t="6547" r="7987" b="6706">follows:</wd>

</ln>

</para>

<para l="6389" t="6778" r="8232" b="7070" alignment="left" li="288" spaceAfter="32" lsp="exactly" lspExact="345" language="en">

<ln l="6389" t="6778" r="8232" b="7070" baseLine="6994">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><wd l="6389" t="6859" r="6528" b="7003">G</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><wd l="6590" t="6922" r="6696" b="6974">=</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><wd l="6758" t="6902" r="7003" b="7046">arg</wd>

<space/>

</run>

<wd l="7037" t="6778" r="7445" b="7070"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11">max</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11">{</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><space/>

</run>

<wd l="7469" t="6778" r="7795" b="7070"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11">P</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11">T</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><wd l="7862" t="6854" r="7872" b="7046">|</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><wd l="7939" t="6859" r="8074" b="7003">O</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-11"><wd l="8093" t="6778" r="8232" b="7070">)}</wd>

</run>

</ln>

</para>

<para l="6667" t="7411" r="7459" b="7560" alignment="right" spaceBefore="204" spaceAfter="181" lsp="exactly" lspExact="240" language="en">

<ln l="6667" t="7411" r="7459" b="7560" baseLine="7512">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6667" t="7435" r="6773" b="7488">=</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6835" t="7416" r="7080" b="7560">arg</wd>

<space/>

<wd l="7118" t="7416" r="7459" b="7517">max</wd>

</run>

</ln>

</para>

<para l="6120" t="7829" r="10493" b="8347" alignment="justified" spaceBefore="41" lsp="exactly" lspExact="298" language="en">

<ln l="6120" t="7829" r="10493" b="8030" baseLine="7978">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6120" t="7829" r="6706" b="7987">Where</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6816" t="7834" r="6941" b="7982">T</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7056" t="7829" r="7195" b="7987">is</wd>

<space/>

<wd l="7339" t="7877" r="7435" b="7987">a</wd>

<space/>

<wd l="7565" t="7848" r="8069" b="8030">target</wd>

<space/>

<wd l="8203" t="7829" r="8702" b="8021">word,</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8827" t="7834" r="8952" b="7987">0</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9072" t="7829" r="9211" b="7987">is</wd>

<space/>

<wd l="9355" t="7877" r="9562" b="7987">an</wd>

<space/>

<wd l="9701" t="7829" r="10493" b="7987">observed</wd>

<space/>

</run>

</ln>

<ln l="6120" t="8050" r="10493" b="8347" baseLine="8277">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6120" t="8122" r="6624" b="8314">word,</wd>

<space/>

</run>

<wd l="6821" t="8107" r="7046" b="8342"><run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">f</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">m</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1400" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" scale="750"><space/>

</run>

<wd l="7085" t="8050" r="7354" b="8347"><run underlined="none" subsuperscript="none" fontSize="1400" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" scale="750">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">T</run>

<run underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="7392" t="8050" r="7642" b="8347"><run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">O</run>

<run underlined="none" subsuperscript="none" fontSize="1400" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" scale="750">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1400" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" scale="750"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7752" t="8122" r="7891" b="8280">is</wd>

<space/>

<wd l="8021" t="8170" r="8117" b="8280">a</wd>

<space/>

<wd l="8237" t="8122" r="8842" b="8280">feature</wd>

<space/>

<wd l="8971" t="8122" r="9749" b="8314">function,</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9845" t="8126" r="10022" b="8275">M</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="10128" t="8122" r="10267" b="8280">is</wd>

<space/>

<wd l="10397" t="8170" r="10493" b="8280">a</wd>

</run>

</ln>

</para>

<para l="6120" t="8443" r="10488" b="10176" alignment="justified" spaceBefore="27" lsp="exactly" lspExact="253" language="en">

<ln l="6120" t="8443" r="10483" b="8635" baseLine="8592">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6120" t="8443" r="6797" b="8602">number</wd>

<space/>

<wd l="6874" t="8443" r="7075" b="8602">of</wd>

<space/>

<wd l="7123" t="8443" r="7512" b="8602">total</wd>

<space/>

<wd l="7594" t="8443" r="8198" b="8602">feature</wd>

<space/>

<wd l="8280" t="8443" r="9139" b="8635">functions,</wd>

<space/>

<wd l="9226" t="8443" r="9542" b="8602">and</wd>

<space/>

<wd l="9610" t="8443" r="9720" b="8602">X</wd>

<space/>

<wd l="9797" t="8443" r="9936" b="8602">is</wd>

<space/>

<wd l="10018" t="8491" r="10114" b="8602">a</wd>

<space/>

</run>

<wd l="10181" t="8448" r="10483" b="8602"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">L</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">a-</run>

</wd>

</ln>

<ln l="6125" t="8698" r="10483" b="8899" baseLine="8846" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="8746" r="6715" b="8899">grange</wd>

<space/>

<wd l="6806" t="8698" r="7680" b="8899">multiplier</wd>

<space/>

<wd l="7771" t="8698" r="7968" b="8856">of</wd>

<space/>

<wd l="8040" t="8698" r="8438" b="8856">each</wd>

<space/>

<wd l="8530" t="8698" r="9302" b="8856">function.</wd>

<space/>

<wd l="9408" t="8702" r="9586" b="8851">In</wd>

<space/>

<wd l="9677" t="8746" r="9970" b="8856">our</wd>

<space/>

<wd l="10061" t="8746" r="10483" b="8890">case,</wd>

<space/>

</ln>

<ln l="6125" t="8947" r="10483" b="9158" baseLine="9107">

<run underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6125" t="8957" r="6302" b="9106">M</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6374" t="8952" r="6926" b="9154">equals</wd>

<space/>

<wd l="7018" t="8952" r="7507" b="9144">three,</wd>

<space/>

<wd l="7608" t="8952" r="7776" b="9106">in</wd>

<space/>

<wd l="7862" t="8952" r="8400" b="9110">which</wd>

<space/>

</run>

<wd l="8448" t="8947" r="8621" b="9158"><run underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">f</run>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="8698" t="8952" r="8837" b="9110">is</wd>

<space/>

<wd l="8928" t="8952" r="9197" b="9110">the</wd>

<space/>

<wd l="9283" t="8952" r="10147" b="9154">positional</wd>

<space/>

<wd l="10248" t="8952" r="10483" b="9106">in-</wd>

</run>

</ln>

<ln l="6125" t="9206" r="10488" b="9418" baseLine="9366">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6125" t="9211" r="6768" b="9413">dexing,</wd>

<space/>

</run>

<wd l="6869" t="9206" r="7046" b="9418"><run underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">f</run>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="7166" t="9211" r="7306" b="9370">is</wd>

<space/>

<wd l="7445" t="9211" r="7714" b="9370">the</wd>

<space/>

<wd l="7853" t="9211" r="9475" b="9413">dependency-based</wd>

<space/>

<wd l="9610" t="9211" r="10488" b="9413">frequency</wd>

<space/>

</run>

</ln>

<ln l="6125" t="9466" r="10488" b="9677" baseLine="9621">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6125" t="9470" r="6782" b="9662">feature,</wd>

<space/>

<wd l="6960" t="9470" r="7277" b="9629">and</wd>

<space/>

</run>

<wd l="7397" t="9466" r="7570" b="9677"><run underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">f</run>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">3</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="7723" t="9470" r="7862" b="9629">is</wd>

<space/>

<wd l="8030" t="9470" r="8299" b="9629">the</wd>

<space/>

<wd l="8467" t="9475" r="8794" b="9624">LM</wd>

<space/>

<wd l="8957" t="9470" r="9970" b="9672">probability.</wd>

<space/>

<wd l="10147" t="9470" r="10488" b="9629">The</wd>

<space/>

</run>

</ln>

<ln l="6120" t="9725" r="10483" b="9926" baseLine="9874">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6120" t="9730" r="6797" b="9883">Maxent</wd>

<space/>

<wd l="6864" t="9725" r="7570" b="9926">requires</wd>

<space/>

<wd l="7642" t="9725" r="7752" b="9883">X</wd>

<space/>

<wd l="7814" t="9725" r="8309" b="9926">being</wd>

<space/>

<wd l="8381" t="9725" r="9370" b="9883">determined</wd>

<space/>

<wd l="9437" t="9725" r="9605" b="9878">in</wd>

<space/>

<wd l="9672" t="9725" r="9941" b="9883">the</wd>

<space/>

</run>

<wd l="10013" t="9725" r="10483" b="9883"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">trai</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">n-</run>

</wd>

</ln>

<ln l="6125" t="9974" r="9077" b="10176" baseLine="10128" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="9974" r="6403" b="10176">ing</wd>

<space/>

<wd l="6451" t="9974" r="6960" b="10176">phase</wd>

<space/>

<wd l="7013" t="9974" r="7574" b="10133">before</wd>

<space/>

<wd l="7632" t="9974" r="7901" b="10133">the</wd>

<space/>

<wd l="7958" t="9974" r="8477" b="10133">actual</wd>

<space/>

<wd l="8534" t="10022" r="9077" b="10176">usage.</wd>

</ln>

</para>

<para l="6125" t="10435" r="10238" b="10656" alignment="left" spaceBefore="202" lsp="exactly" lspExact="279" language="en">

<ln l="6125" t="10435" r="10238" b="10656" baseLine="10598" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="6">

<wd l="6125" t="10440" r="6235" b="10608">6</wd>

<space/>

<wd l="6557" t="10440" r="7954" b="10656">Experimental</wd>

<space/>

<wd l="8016" t="10440" r="8683" b="10608">results</wd>

<space/>

<wd l="8760" t="10440" r="9134" b="10608">and</wd>

<space/>

<wd l="9206" t="10440" r="10238" b="10608">discussion</wd>

</ln>

</para>

<para l="6115" t="10862" r="10488" b="13550" alignment="justified" spaceBefore="154" lsp="exactly" lspExact="253" language="en">

<ln l="6120" t="10862" r="10488" b="11064" baseLine="11016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10867" r="6427" b="11021">We</wd>

<space/>

<wd l="6547" t="10862" r="7277" b="11021">evaluate</wd>

<space/>

<wd l="7402" t="10910" r="7690" b="11021">our</wd>

<space/>

<wd l="7810" t="10862" r="8611" b="11064">approach</wd>

<space/>

<wd l="8731" t="10862" r="8899" b="11016">in</wd>

<space/>

<wd l="9014" t="10882" r="9494" b="11021">terms</wd>

<space/>

<wd l="9619" t="10862" r="9821" b="11021">of</wd>

<space/>

<wd l="9917" t="10867" r="10488" b="11021">BLEU</wd>

<space/>

</ln>

<ln l="6130" t="11117" r="10483" b="11318" baseLine="11270" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="11165" r="6586" b="11275">score</wd>

<space/>

<wd l="6686" t="11117" r="7574" b="11318">(Papineni,</wd>

<space/>

<wd l="7680" t="11117" r="8400" b="11309">Roukos,</wd>

<space/>

<wd l="8501" t="11117" r="9038" b="11309">Ward,</wd>

<space/>

<wd l="9149" t="11117" r="9312" b="11275">&amp;</wd>

<space/>

<wd l="9413" t="11117" r="9816" b="11309">Zhu,</wd>

<space/>

<wd l="9926" t="11117" r="10483" b="11318">2002),</wd>

<space/>

</ln>

<ln l="6130" t="11371" r="10483" b="11530" baseLine="11520" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="11371" r="6571" b="11530">since</wd>

<space/>

<wd l="6648" t="11376" r="7224" b="11530">BLEU</wd>

<space/>

<wd l="7296" t="11371" r="7584" b="11530">has</wd>

<space/>

<wd l="7661" t="11371" r="8342" b="11530">become</wd>

<space/>

<wd l="8424" t="11419" r="8520" b="11530">a</wd>

<space/>

<wd l="8592" t="11371" r="9643" b="11530">well-known</wd>

<space/>

<wd l="9720" t="11371" r="10042" b="11530">and</wd>

<space/>

<wd l="10114" t="11371" r="10483" b="11530">ade-</wd>

</ln>

<ln l="6125" t="11626" r="10483" b="11827" baseLine="11774" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11645" r="6600" b="11827">quate</wd>

<space/>

<wd l="6686" t="11626" r="7598" b="11784">evaluation</wd>

<space/>

<wd l="7680" t="11626" r="8232" b="11784">metric</wd>

<space/>

<wd l="8328" t="11626" r="8491" b="11779">in</wd>

<space/>

<wd l="8573" t="11626" r="9797" b="11784">normalization</wd>

<space/>

<wd l="9888" t="11626" r="10483" b="11784">studies</wd>

<space/>

</ln>

<ln l="6125" t="11875" r="10474" b="12077" baseLine="12029" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11875" r="7186" b="12077">(Contractor,</wd>

<space/>

<wd l="7320" t="11875" r="8150" b="12077">Faruquie,</wd>

<space/>

<wd l="8290" t="11875" r="8453" b="12034">&amp;</wd>

<space/>

<wd l="8592" t="11875" r="9850" b="12067">Subramaniam,</wd>

<space/>

<wd l="9989" t="11875" r="10474" b="12067">2010;</wd>

<space/>

</ln>

<ln l="6130" t="12130" r="10488" b="12331" baseLine="12278" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="12130" r="6941" b="12331">Schlippe,</wd>

<space/>

<wd l="7027" t="12130" r="7426" b="12322">Zhu,</wd>

<space/>

<wd l="7517" t="12130" r="8376" b="12322">Gebhardt,</wd>

<space/>

<wd l="8462" t="12130" r="8626" b="12288">&amp;</wd>

<space/>

<wd l="8717" t="12130" r="9418" b="12322">Schultz,</wd>

<space/>

<wd l="9504" t="12130" r="10056" b="12331">2010).</wd>

<space/>

<wd l="10147" t="12130" r="10488" b="12288">The</wd>

<space/>

</ln>

<ln l="6125" t="12384" r="10464" b="12586" baseLine="12533" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12384" r="6907" b="12542">achieved</wd>

<space/>

<wd l="6970" t="12384" r="7694" b="12542">baseline</wd>

<space/>

<wd l="7771" t="12384" r="8026" b="12542">for</wd>

<space/>

<wd l="8093" t="12384" r="8362" b="12542">the</wd>

<space/>

<wd l="8434" t="12384" r="9024" b="12586">testing</wd>

<space/>

<wd l="9096" t="12384" r="9706" b="12542">dataset</wd>

<space/>

<wd l="9778" t="12384" r="9917" b="12542">is</wd>

<space/>

<wd l="9994" t="12384" r="10464" b="12542">42.01</wd>

<space/>

</ln>

<ln l="6120" t="12634" r="10488" b="12835" baseLine="12787" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="12638" r="6696" b="12792">BLEU</wd>

<space/>

<wd l="6816" t="12682" r="7320" b="12826">score,</wd>

<space/>

<wd l="7440" t="12634" r="7771" b="12792">that</wd>

<space/>

<wd l="7886" t="12634" r="8078" b="12826">is,</wd>

<space/>

<wd l="8198" t="12634" r="8462" b="12792">the</wd>

<space/>

<wd l="8578" t="12634" r="9230" b="12792">volume</wd>

<space/>

<wd l="9350" t="12634" r="9552" b="12792">of</wd>

<space/>

<wd l="9653" t="12634" r="10488" b="12835">similarity</wd>

<space/>

</ln>

<ln l="6115" t="12888" r="10488" b="13090" baseLine="13042" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="12888" r="6854" b="13046">between</wd>

<space/>

<wd l="6955" t="12888" r="7224" b="13046">the</wd>

<space/>

<wd l="7330" t="12888" r="7915" b="13090">testing</wd>

<space/>

<wd l="8016" t="12907" r="8347" b="13046">text</wd>

<space/>

<wd l="8453" t="12888" r="8770" b="13046">and</wd>

<space/>

<wd l="8866" t="12888" r="9134" b="13046">the</wd>

<space/>

<wd l="9240" t="12888" r="10056" b="13046">reference</wd>

<space/>

<wd l="10157" t="12907" r="10488" b="13046">text</wd>

<space/>

</ln>

<ln l="6125" t="13142" r="10488" b="13344" baseLine="13291" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13142" r="7018" b="13344">(manually</wd>

<space/>

<wd l="7138" t="13142" r="8136" b="13301">normalized</wd>

<space/>

<wd l="8251" t="13142" r="8650" b="13344">text)</wd>

<space/>

<wd l="8784" t="13142" r="8952" b="13296">in</wd>

<space/>

<wd l="9077" t="13162" r="9485" b="13301">term</wd>

<space/>

<wd l="9610" t="13142" r="9811" b="13301">of</wd>

<space/>

<wd l="9912" t="13147" r="10488" b="13301">BLEU</wd>

<space/>

</ln>

<ln l="6130" t="13397" r="6629" b="13550" baseLine="13546" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="13440" r="6629" b="13550">score.</wd>

</ln>

</para>

<para l="6115" t="13646" r="10488" b="15365" alignment="justified" spaceAfter="19" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6355" t="13646" r="10488" b="13848" baseLine="13800" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="13651" r="6528" b="13800">In</wd>

<space/>

<wd l="6595" t="13646" r="6864" b="13805">the</wd>

<space/>

<wd l="6931" t="13646" r="7618" b="13848">training</wd>

<space/>

<wd l="7675" t="13646" r="8227" b="13848">phase,</wd>

<space/>

<wd l="8299" t="13694" r="8554" b="13805">we</wd>

<space/>

<wd l="8611" t="13646" r="9538" b="13848">performed</wd>

<space/>

<wd l="9595" t="13646" r="10488" b="13805">maximum</wd>

<space/>

</ln>

<ln l="6125" t="13901" r="10483" b="14102" baseLine="14050" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13901" r="7022" b="14059">likelihood</wd>

<space/>

<wd l="7104" t="13901" r="7790" b="14102">training</wd>

<space/>

<wd l="7882" t="13901" r="8770" b="14102">(Papineni,</wd>

<space/>

<wd l="8866" t="13901" r="9586" b="14093">Roukos,</wd>

<space/>

<wd l="9686" t="13901" r="9850" b="14059">&amp;</wd>

<space/>

<wd l="9941" t="13901" r="10483" b="14093">Ward,</wd>

<space/>

</ln>

<ln l="6144" t="14155" r="10469" b="14357" baseLine="14307">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6144" t="14155" r="6610" b="14347">1998;</wd>

<space/>

<wd l="6691" t="14155" r="7157" b="14314">Streit</wd>

<space/>

<wd l="7219" t="14155" r="7382" b="14314">&amp;</wd>

<space/>

<wd l="7440" t="14155" r="8405" b="14357">Luginbuhl,</wd>

<space/>

<wd l="8491" t="14155" r="8971" b="14357">1994)</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9038" t="14155" r="9293" b="14314">for</wd>

<space/>

</run>

<wd l="9350" t="14155" r="9595" b="14347"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="9662" t="14155" r="9845" b="14338"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9917" t="14155" r="10238" b="14314">and</wd>

<space/>

</run>

<wd l="10286" t="14155" r="10469" b="14342"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">3</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6115" t="14405" r="10483" b="14606" baseLine="14558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="14405" r="6854" b="14563">between</wd>

<space/>

<wd l="6950" t="14405" r="7219" b="14563">0.0</wd>

<space/>

<wd l="7320" t="14405" r="7637" b="14563">and</wd>

<space/>

<wd l="7747" t="14405" r="8040" b="14563">1.0.</wd>

<space/>

<wd l="8146" t="14405" r="8717" b="14606">Figure</wd>

<space/>

<wd l="8832" t="14405" r="8894" b="14558">1</wd>

<space/>

<wd l="9019" t="14405" r="9557" b="14563">shows</wd>

<space/>

<wd l="9653" t="14405" r="9917" b="14563">the</wd>

<space/>

<wd l="10013" t="14405" r="10483" b="14563">toler-</wd>

</ln>

<ln l="6125" t="14659" r="10459" b="14861" baseLine="14809">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="14707" r="6523" b="14818">ance</wd>

<space/>

<wd l="6629" t="14659" r="6830" b="14818">of</wd>

<space/>

<wd l="6907" t="14659" r="7176" b="14818">the</wd>

<space/>

<wd l="7272" t="14659" r="8386" b="14861">performance</wd>

<space/>

<wd l="8486" t="14659" r="8976" b="14818">while</wd>

<space/>

<wd l="9077" t="14659" r="9907" b="14818">transition</wd>

<space/>

<wd l="10013" t="14659" r="10214" b="14818">of</wd>

<space/>

</run>

<wd l="10291" t="14659" r="10459" b="14842"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6125" t="14914" r="10488" b="15115" baseLine="15064">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="14914" r="6446" b="15072">and</wd>

<space/>

</run>

<wd l="6499" t="14914" r="6686" b="15096"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6768" t="14914" r="7315" b="15115">(when</wd>

<space/>

</run>

<wd l="7373" t="14914" r="7555" b="15101"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">3</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7642" t="14914" r="7781" b="15072">is</wd>

<space/>

<wd l="7853" t="14914" r="8304" b="15072">fixed</wd>

<space/>

<wd l="8357" t="14933" r="8525" b="15072">to</wd>

<space/>

<wd l="8616" t="14914" r="8986" b="15115">1.0).</wd>

<space/>

<wd l="9058" t="14914" r="9629" b="15115">Figure</wd>

<space/>

<wd l="9715" t="14914" r="9778" b="15067">1</wd>

<space/>

<wd l="9874" t="14914" r="10488" b="15115">depicts</wd>

<space/>

</run>

</ln>

<ln l="6120" t="15163" r="10478" b="15365" baseLine="15317">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6120" t="15163" r="6451" b="15322">that</wd>

<space/>

<wd l="6523" t="15163" r="6792" b="15322">the</wd>

<space/>

<wd l="6869" t="15163" r="7339" b="15322">value</wd>

<space/>

<wd l="7421" t="15163" r="7618" b="15322">of</wd>

<space/>

<wd l="7666" t="15163" r="8779" b="15365">performance</wd>

<space/>

<wd l="8861" t="15163" r="9610" b="15322">achieves</wd>

<space/>

<wd l="9682" t="15163" r="9950" b="15322">the</wd>

<space/>

<wd l="10027" t="15163" r="10478" b="15365">high-</wd>

</run>

<run fontFace="Times New Roman" fontFamily="roman" fontPitch="variable"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="7464" t="7122" r="10515" b="7750">

<dd l="8592" t="7122" r="10515" b="7750">

<para l="8592" t="7176" r="9296" b="7728" alignment="left" spaceBefore="54" spaceAfter="19" lsp="exactly" lspExact="552" language="en">

<ln l="8621" t="7176" r="9230" b="7728" baseLine="7512">

<wd l="8621" t="7291" r="8866" b="7584"><run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16">T</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16"><space/>

</run>

<wd l="8904" t="7176" r="9230" b="7728"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16">O</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16">)</run>

<run underlined="none" subsuperscript="subscript" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16"></run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16"></run>

<run underlined="none" subsuperscript="superscript" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16"></run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="16"><nl orig="true"/>

</run>

</ln>

</para>

</dd>

<dd l="7464" t="7122" r="7589" b="7750">

<para l="7464" t="7320" r="7589" b="7704" alignment="left" spaceBefore="198" spaceAfter="43" lsp="exactly" lspExact="384" language="en">

<ln l="7469" t="7320" r="7565" b="7704" baseLine="7550" underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7469" t="7320" r="7565" b="7704">Y</wd>

</ln>

</para>

</dd>

<dd l="7589" t="7122" r="7934" b="7550">

<para l="7589" t="7195" r="7904" b="7334" alignment="left" spaceBefore="29" lsp="exactly" lspExact="207" language="en">

<ln l="7642" t="7195" r="7838" b="7334" baseLine="7330" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7642" t="7195" r="7838" b="7334">M</wd>

</ln>

</para>

<para l="7609" t="7363" r="7876" b="7541" alignment="left" lsp="exactly" lspExact="187" language="en">

<ln l="7675" t="7363" r="7810" b="7541" baseLine="7517" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7675" t="7363" r="7810" b="7541">∑</wd>

</ln>

</para>

</dd>

<dd l="7934" t="7122" r="8261" b="7750">

<para l="7934" t="7368" r="8260" b="7579" alignment="left" spaceBefore="235" spaceAfter="150" lsp="exactly" lspExact="240" language="en">

<ln l="7934" t="7368" r="8194" b="7579" baseLine="7550">

<wd l="7934" t="7368" r="8194" b="7579"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7">A</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7">m</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7"><nl orig="true"/>

</run>

</ln>

</para>

</dd>

<dd l="8261" t="7122" r="8592" b="7750">

<para l="8261" t="7368" r="8592" b="7579" alignment="left" li="72" spaceBefore="207" spaceAfter="160" lsp="exactly" lspExact="258" language="en">

<bullet type="bulleted" value="smallCircle" numChars="1">

</bullet>

<ln l="8261" t="7368" r="8587" b="7579" baseLine="7550">

<wd l="8261" t="7416" r="8333" b="7512">•</wd>

<wd l="8338" t="7368" r="8587" b="7579"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">f</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">m</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><nl orig="true"/>

</run>

</ln>

</para>

</dd>

<dd l="7589" t="7550" r="7934" b="7750">

<para l="7589" t="7550" r="7934" b="7694" alignment="left" lsp="exactly" lspExact="197" language="en">

<ln l="7589" t="7550" r="7906" b="7694" baseLine="7690">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-36"><wd l="7589" t="7598" r="7728" b="7694">m</wd>

<space/>

</run>

<wd l="7738" t="7550" r="7906" b="7690"><run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-36">=</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-36">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-36"><nl orig="true"/>

</run>

</ln>

</para>

</dd>

</dd>

<dd l="1399" t="15736" r="10515" b="15977">

<para l="5804" t="15792" r="6138" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15792" r="6072" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="38">

<wd l="5870" t="15792" r="6072" b="15946">23</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1291" marginTop="2429" marginRight="1332" marginBottom="1302" offsetX="54" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<dd l="1416" t="1417" r="5789" b="2429">

<para l="1416" t="1464" r="5784" b="2424" alignment="justified" lsp="exactly" lspExact="250" language="en">

<ln l="1421" t="1464" r="5779" b="1656" baseLine="1615">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1421" t="1483" r="1666" b="1622">est</wd>

<space/>

<wd l="1733" t="1464" r="2208" b="1622">when</wd>

<space/>

<wd l="2270" t="1464" r="2539" b="1622">the</wd>

<space/>

</run>

<wd l="2606" t="1464" r="2779" b="1646"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="2880" t="1464" r="3197" b="1622">and</wd>

<space/>

</run>

<wd l="3259" t="1464" r="3446" b="1646"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="3533" t="1512" r="3792" b="1622">are</wd>

<space/>

<wd l="3864" t="1464" r="4310" b="1622">close</wd>

<space/>

<wd l="4382" t="1483" r="4546" b="1622">to</wd>

<space/>

<wd l="4622" t="1464" r="4992" b="1622">0.63</wd>

<space/>

<wd l="5074" t="1464" r="5395" b="1622">and</wd>

<space/>

<wd l="5458" t="1464" r="5779" b="1656">0.9,</wd>

<space/>

</run>

</ln>

<ln l="1416" t="1714" r="5784" b="1915" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="1714" r="2525" b="1915">respectively.</wd>

<space/>

<wd l="2597" t="1718" r="2722" b="1872">It</wd>

<space/>

<wd l="2784" t="1714" r="2923" b="1872">is</wd>

<space/>

<wd l="2986" t="1714" r="3504" b="1872">found</wd>

<space/>

<wd l="3552" t="1714" r="3883" b="1872">that</wd>

<space/>

<wd l="3936" t="1714" r="4205" b="1872">the</wd>

<space/>

<wd l="4258" t="1714" r="4618" b="1872">best</wd>

<space/>

<wd l="4666" t="1714" r="5784" b="1915">performance</wd>

<space/>

</ln>

<ln l="1421" t="1963" r="5779" b="2170" baseLine="2123">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1421" t="1968" r="1560" b="2126">is</wd>

<space/>

<wd l="1632" t="1968" r="2414" b="2126">achieved</wd>

<space/>

<wd l="2472" t="1968" r="2698" b="2170">by</wd>

<space/>

<wd l="2765" t="1968" r="3082" b="2160">0.6,</wd>

<space/>

<wd l="3158" t="1968" r="3475" b="2160">0.9,</wd>

<space/>

<wd l="3547" t="1968" r="3869" b="2126">and</wd>

<space/>

<wd l="3950" t="1968" r="4195" b="2126">1.0</wd>

<space/>

<wd l="4262" t="1968" r="4824" b="2126">values</wd>

<space/>

<wd l="4896" t="1968" r="5150" b="2126">for</wd>

<space/>

</run>

<wd l="5208" t="1968" r="5458" b="2160"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<wd l="5534" t="1968" r="5779" b="2160"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

</ln>

<ln l="1421" t="2222" r="5784" b="2424" baseLine="2371">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1421" t="2222" r="1742" b="2381">and</wd>

<space/>

</run>

<wd l="1805" t="2222" r="2050" b="2414"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">X</run>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">3</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<run underlined="single" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="2117" t="2222" r="3221" b="2424">respectively.</wd>

<space/>

<wd l="3302" t="2222" r="3686" b="2381">This</wd>

<space/>

<wd l="3749" t="2270" r="4306" b="2381">means</wd>

<space/>

<wd l="4373" t="2222" r="4709" b="2381">that</wd>

<space/>

<wd l="4771" t="2227" r="5093" b="2376">LM</wd>

<space/>

<wd l="5160" t="2222" r="5448" b="2381">has</wd>

<space/>

<wd l="5520" t="2222" r="5784" b="2381">the</wd>

</run>

</ln>

</para>

</dd>

<dd l="6120" t="1417" r="10498" b="2180">

<para l="6120" t="1464" r="10493" b="2170" alignment="justified" spaceBefore="4" lsp="exactly" lspExact="251" language="en">

<ln l="6120" t="1464" r="10493" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="1464" r="6758" b="1666">highest</wd>

<space/>

<wd l="6869" t="1464" r="7464" b="1666">impact</wd>

<space/>

<wd l="7579" t="1512" r="7790" b="1622">on</wd>

<space/>

<wd l="7901" t="1464" r="8165" b="1622">the</wd>

<space/>

<wd l="8280" t="1464" r="9115" b="1622">candidate</wd>

<space/>

<wd l="9235" t="1464" r="10056" b="1656">selection,</wd>

<space/>

<wd l="10176" t="1464" r="10493" b="1622">and</wd>

<space/>

</ln>

<ln l="6120" t="1714" r="10493" b="1915" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="1714" r="6451" b="1872">that</wd>

<space/>

<wd l="6571" t="1714" r="8198" b="1915">dependency-based</wd>

<space/>

<wd l="8314" t="1714" r="9192" b="1915">frequency</wd>

<space/>

<wd l="9302" t="1714" r="9595" b="1872">has</wd>

<space/>

<wd l="9720" t="1762" r="9816" b="1872">a</wd>

<space/>

<wd l="9931" t="1714" r="10493" b="1915">higher</wd>

<space/>

</ln>

<ln l="6125" t="1968" r="10133" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1968" r="6720" b="2170">impact</wd>

<space/>

<wd l="6782" t="2016" r="6998" b="2126">on</wd>

<space/>

<wd l="7056" t="1968" r="7891" b="2126">candidate</wd>

<space/>

<wd l="7958" t="1968" r="8731" b="2126">selection</wd>

<space/>

<wd l="8784" t="1968" r="9158" b="2126">than</wd>

<space/>

<wd l="9216" t="1968" r="10133" b="2170">positional.</wd>

</ln>

</para>

</dd>

<dd l="2578" t="2429" r="9538" b="5198">

<picture l="2578" t="2429" r="9538" b="5198" alignment="left">

</picture>

</dd>

<dd l="2623" t="2743" r="3056" b="2975">

<para l="2656" t="2808" r="3023" b="2942" alignment="left" spaceBefore="27" lsp="exactly" lspExact="196" language="en">

<ln l="2722" t="2808" r="2957" b="2942" baseLine="2938" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="6">

<wd l="2722" t="2808" r="2957" b="2942">0.9</wd>

</ln>

</para>

</dd>

<dd l="2623" t="2975" r="3061" b="3205">

<para l="2656" t="3043" r="3028" b="3178" alignment="left" spaceBefore="25" lsp="exactly" lspExact="196" language="en">

<ln l="2722" t="3043" r="2962" b="3178" baseLine="3168" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="7">

<wd l="2722" t="3043" r="2962" b="3178">0.8</wd>

</ln>

</para>

</dd>

<dd l="2623" t="3205" r="3061" b="3435">

<para l="2656" t="3274" r="3028" b="3408" alignment="left" spaceBefore="25" lsp="exactly" lspExact="197" language="en">

<ln l="2722" t="3274" r="2962" b="3408" baseLine="3398" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="7">

<wd l="2722" t="3274" r="2962" b="3408">0.7</wd>

</ln>

</para>

</dd>

<dd l="2623" t="3435" r="3061" b="3666">

<para l="2656" t="3504" r="3028" b="3638" alignment="left" spaceBefore="26" lsp="exactly" lspExact="196" language="en">

<ln l="2722" t="3504" r="2962" b="3638" baseLine="3629" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="7">

<wd l="2722" t="3504" r="2962" b="3638">0.6</wd>

</ln>

</para>

</dd>

<dd l="2623" t="3669" r="3056" b="3901">

<para l="2656" t="3734" r="3023" b="3869" alignment="left" spaceBefore="27" lsp="exactly" lspExact="192" language="en">

<ln l="2722" t="3734" r="2957" b="3869" baseLine="3864" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="6">

<wd l="2722" t="3734" r="2957" b="3869">0.5</wd>

</ln>

</para>

</dd>

<dd l="2623" t="3901" r="3061" b="4131">

<para l="2656" t="3965" r="3028" b="4099" alignment="left" spaceBefore="25" lsp="exactly" lspExact="192" language="en">

<ln l="2722" t="3965" r="2962" b="4099" baseLine="4094" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="7">

<wd l="2722" t="3965" r="2962" b="4099">0.4</wd>

</ln>

</para>

</dd>

<dd l="2623" t="4131" r="3056" b="4362">

<para l="2656" t="4200" r="3023" b="4334" alignment="left" spaceBefore="26" lsp="exactly" lspExact="191" language="en">

<ln l="2722" t="4200" r="2957" b="4334" baseLine="4325" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="6">

<wd l="2722" t="4200" r="2957" b="4334">0.3</wd>

</ln>

</para>

</dd>

<dd l="2623" t="4362" r="3056" b="4592">

<para l="2656" t="4430" r="3023" b="4565" alignment="left" spaceBefore="25" lsp="exactly" lspExact="192" language="en">

<ln l="2722" t="4430" r="2957" b="4565" baseLine="4555" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="6">

<wd l="2722" t="4430" r="2957" b="4565">0.2</wd>

</ln>

</para>

</dd>

<dd l="2623" t="4592" r="3056" b="4823">

<para l="2656" t="4661" r="3023" b="4795" alignment="left" spaceBefore="26" lsp="exactly" lspExact="191" language="en">

<ln l="2722" t="4661" r="2957" b="4795" baseLine="4786" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="6">

<wd l="2722" t="4661" r="2957" b="4795">0.1</wd>

</ln>

</para>

</dd>

<dd l="2776" t="4826" r="3065" b="5058">

<para l="2809" t="4891" r="3032" b="5026" alignment="left" spaceBefore="27" lsp="exactly" lspExact="201" language="en">

<ln l="2875" t="4891" r="2966" b="5026" baseLine="5021" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="0">

<wd l="2875" t="4891" r="2966" b="5026">0</wd>

</ln>

</para>

</dd>

<dd l="2786" t="2507" r="3061" b="2739">

<para l="2819" t="2578" r="3028" b="2707" alignment="left" spaceBefore="27" lsp="exactly" lspExact="202" language="en">

<ln l="2885" t="2578" r="2962" b="2707" baseLine="2702" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="0">

<wd l="2885" t="2578" r="2962" b="2707">1</wd>

</ln>

</para>

</dd>

<dd l="8290" t="3304" r="9326" b="4261">

<para l="8290" t="3365" r="9322" b="4234" alignment="left" lsp="exactly" lspExact="314" language="en">

<ln l="8290" t="3365" r="9058" b="3509" baseLine="3499" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="-11">

<wd l="8290" t="3365" r="8904" b="3509">Lambda</wd>

<space/>

<wd l="8976" t="3374" r="9058" b="3504">2</wd>

<space/>

</ln>

<ln l="8290" t="3725" r="9058" b="3869" baseLine="3864" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="-11">

<wd l="8290" t="3725" r="8904" b="3869">Lambda</wd>

<space/>

<wd l="8981" t="3734" r="9058" b="3864">1</wd>

<space/>

</ln>

<ln l="8290" t="4090" r="9322" b="4234" baseLine="4224" underlined="none" subsuperscript="none" fontSize="900" fontFace="Verdana" fontFamily="swiss" fontPitch="variable" spacing="-11">

<wd l="8290" t="4090" r="9322" b="4234">Performance</wd>

</ln>

</para>

</dd>

<section l="2578" t="5218" r="9538" b="5722">

<column l="2578" t="5218" r="9538" b="5722">

<para l="3605" t="5266" r="8294" b="5467" alignment="centered" spaceBefore="1" spaceAfter="244" lsp="exactly" lspExact="253" language="en">

<ln l="3605" t="5266" r="8294" b="5467" baseLine="5414" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3605" t="5266" r="4176" b="5467">Figure</wd>

<space/>

<wd l="4258" t="5266" r="4387" b="5424">1:</wd>

<space/>

<wd l="4464" t="5266" r="4800" b="5424">The</wd>

<space/>

<wd l="4858" t="5266" r="5544" b="5467">training</wd>

<space/>

<wd l="5602" t="5266" r="5803" b="5424">of</wd>

<space/>

<wd l="5837" t="5270" r="6509" b="5424">Maxent</wd>

<space/>

<wd l="6566" t="5266" r="6821" b="5424">for</wd>

<space/>

<wd l="6878" t="5266" r="7522" b="5424">lambda</wd>

<space/>

<wd l="7584" t="5266" r="8294" b="5467">settings.</wd>

</ln>

</para>

</column>

</section>

<section l="1291" t="5722" r="10577" b="15413">

<column l="1291" t="5722" r="5813" b="15413">

<para l="1416" t="5770" r="5808" b="8501" alignment="justified" li="72" spaceBefore="6" spaceAfter="244" lsp="exactly" lspExact="253" language="en">

<ln l="1416" t="5770" r="5779" b="5971" baseLine="5923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5774" r="1723" b="5928">We</wd>

<space/>

<wd l="1805" t="5770" r="2462" b="5928">divided</wd>

<space/>

<wd l="2534" t="5818" r="2827" b="5928">our</wd>

<space/>

<wd l="2904" t="5770" r="3509" b="5928">dataset</wd>

<space/>

<wd l="3586" t="5770" r="3917" b="5928">into</wd>

<space/>

<wd l="4008" t="5770" r="4258" b="5928">six</wd>

<space/>

<wd l="4334" t="5770" r="4800" b="5971">equal</wd>

<space/>

<wd l="4886" t="5789" r="5203" b="5928">sets</wd>

<space/>

<wd l="5285" t="5770" r="5453" b="5923">in</wd>

<space/>

<wd l="5530" t="5818" r="5779" b="5928">or-</wd>

</ln>

<ln l="1421" t="6024" r="5784" b="6226" baseLine="6173" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6024" r="1704" b="6182">der</wd>

<space/>

<wd l="1766" t="6043" r="1934" b="6182">to</wd>

<space/>

<wd l="1997" t="6024" r="2712" b="6226">perform</wd>

<space/>

<wd l="2784" t="6024" r="3317" b="6182">6-fold</wd>

<space/>

<wd l="3384" t="6072" r="3830" b="6182">cross</wd>

<space/>

<wd l="3898" t="6024" r="4824" b="6182">validation.</wd>

<space/>

<wd l="4901" t="6024" r="5141" b="6182">As</wd>

<space/>

<wd l="5218" t="6024" r="5784" b="6182">shown</wd>

<space/>

</ln>

<ln l="1421" t="6278" r="5784" b="6480" baseLine="6427" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6278" r="1589" b="6432">in</wd>

<space/>

<wd l="1694" t="6278" r="2189" b="6437">Table</wd>

<space/>

<wd l="2294" t="6278" r="2448" b="6470">3,</wd>

<space/>

<wd l="2554" t="6278" r="2818" b="6437">the</wd>

<space/>

<wd l="2923" t="6326" r="3600" b="6480">average</wd>

<space/>

<wd l="3706" t="6278" r="3907" b="6437">of</wd>

<space/>

<wd l="3984" t="6278" r="4253" b="6437">the</wd>

<space/>

<wd l="4358" t="6278" r="5117" b="6437">obtained</wd>

<space/>

<wd l="5213" t="6283" r="5784" b="6437">BLEU</wd>

<space/>

</ln>

<ln l="1426" t="6528" r="5779" b="6686" baseLine="6682" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6576" r="1963" b="6686">scores</wd>

<space/>

<wd l="2064" t="6528" r="2232" b="6682">in</wd>

<space/>

<wd l="2333" t="6528" r="2578" b="6686">six</wd>

<space/>

<wd l="2674" t="6528" r="3586" b="6686">evaluation</wd>

<space/>

<wd l="3677" t="6528" r="4272" b="6686">rounds</wd>

<space/>

<wd l="4368" t="6576" r="4704" b="6686">was</wd>

<space/>

<wd l="4810" t="6528" r="5338" b="6686">83.12.</wd>

<space/>

<wd l="5443" t="6528" r="5779" b="6686">The</wd>

<space/>

</ln>

<ln l="1421" t="6782" r="5784" b="6984" baseLine="6931" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6782" r="2338" b="6941">evaluation</wd>

<space/>

<wd l="2429" t="6830" r="3019" b="6984">proves</wd>

<space/>

<wd l="3120" t="6782" r="3451" b="6941">that</wd>

<space/>

<wd l="3557" t="6830" r="3850" b="6941">our</wd>

<space/>

<wd l="3950" t="6782" r="4752" b="6984">approach</wd>

<space/>

<wd l="4848" t="6782" r="5410" b="6941">boosts</wd>

<space/>

<wd l="5515" t="6782" r="5784" b="6941">the</wd>

<space/>

</ln>

<ln l="1416" t="7037" r="5774" b="7238" baseLine="7186" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="7042" r="1992" b="7195">BLEU</wd>

<space/>

<wd l="2069" t="7085" r="2525" b="7195">score</wd>

<space/>

<wd l="2592" t="7037" r="2818" b="7238">by</wd>

<space/>

<wd l="2885" t="7037" r="3355" b="7195">41.11</wd>

<space/>

<wd l="3451" t="7037" r="3778" b="7238">(i.e.</wd>

<space/>

<wd l="3864" t="7037" r="4291" b="7195">from</wd>

<space/>

<wd l="4354" t="7037" r="4829" b="7195">42.01</wd>

<space/>

<wd l="4920" t="7056" r="5088" b="7195">to</wd>

<space/>

<wd l="5170" t="7037" r="5774" b="7238">83.12).</wd>

<space/>

</ln>

<ln l="1426" t="7286" r="5779" b="7488" baseLine="7440" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="7286" r="1906" b="7445">Since</wd>

<space/>

<wd l="1968" t="7286" r="2726" b="7488">previous</wd>

<space/>

<wd l="2794" t="7286" r="4013" b="7445">normalization</wd>

<space/>

<wd l="4090" t="7286" r="4685" b="7445">studies</wd>

<space/>

<wd l="4757" t="7286" r="5165" b="7445">used</wd>

<space/>

<wd l="5227" t="7286" r="5779" b="7445">differ-</wd>

</ln>

<ln l="1421" t="7541" r="5784" b="7742" baseLine="7694" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7560" r="1690" b="7699">ent</wd>

<space/>

<wd l="1800" t="7541" r="2165" b="7699">data</wd>

<space/>

<wd l="2285" t="7589" r="2928" b="7699">sources</wd>

<space/>

<wd l="3048" t="7541" r="3216" b="7694">in</wd>

<space/>

<wd l="3322" t="7541" r="3730" b="7699">their</wd>

<space/>

<wd l="3840" t="7541" r="4958" b="7742">experiments,</wd>

<space/>

<wd l="5078" t="7589" r="5174" b="7699">a</wd>

<space/>

<wd l="5290" t="7541" r="5784" b="7699">direct</wd>

<space/>

</ln>

<ln l="1421" t="7795" r="5789" b="7997" baseLine="7944" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7795" r="2448" b="7997">comparison</wd>

<space/>

<wd l="2530" t="7795" r="3269" b="7954">between</wd>

<space/>

<wd l="3365" t="7843" r="3653" b="7954">our</wd>

<space/>

<wd l="3749" t="7843" r="4526" b="7997">accuracy</wd>

<space/>

<wd l="4618" t="7795" r="5174" b="7954">values</wd>

<space/>

<wd l="5275" t="7795" r="5410" b="7954">is</wd>

<space/>

<wd l="5506" t="7814" r="5789" b="7954">not</wd>

<space/>

</ln>

<ln l="1416" t="8045" r="5808" b="8246" baseLine="8198" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="8045" r="2462" b="8246">meaningful.</wd>

<space/>

<wd l="2587" t="8045" r="3499" b="8237">Therefore,</wd>

<space/>

<wd l="3610" t="8093" r="3864" b="8203">we</wd>

<space/>

<wd l="3970" t="8045" r="5074" b="8203">re-examined</wd>

<space/>

<wd l="5179" t="8093" r="5491" b="8203">one</wd>

<space/>

<wd l="5606" t="8045" r="5808" b="8203">of</wd>

<space/>

</ln>

<ln l="1416" t="8299" r="5683" b="8501" baseLine="8453" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="8299" r="1685" b="8458">the</wd>

<space/>

<wd l="1752" t="8299" r="3048" b="8458">state-of-the-art</wd>

<space/>

<wd l="3110" t="8299" r="4090" b="8501">approaches</wd>

<space/>

<wd l="4152" t="8299" r="4627" b="8501">using</wd>

<space/>

<wd l="4685" t="8347" r="4978" b="8458">our</wd>

<space/>

<wd l="5035" t="8299" r="5683" b="8458">dataset.</wd>

</ln>

</para>

<rulerline l="1291" t="8774" r="4819" b="8774" type="single" width="34" color="000000"/>

<table l="1291" t="8756" r="4841" b="10843" alignment="left" ri="972" spaceAfter="34">

<bottomBorder type="single" width="34"/>

<gridTable>

<gridCol>2218</gridCol>

<gridCol>1332</gridCol>

<gridRow>297</gridRow>

<gridRow>254</gridRow>

<gridRow>254</gridRow>

<gridRow>255</gridRow>

<gridRow>249</gridRow>

<gridRow>255</gridRow>

<gridRow>249</gridRow>

<gridRow>274</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="14"/>

<para l="1426" t="8837" r="3408" b="9038" alignment="left" li="125" spaceBefore="38" lsp="exactly" lspExact="249" language="en">

<ln l="1426" t="8837" r="3408" b="9038" baseLine="8986" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8837" r="1954" b="9038">6-fold</wd>

<space/>

<wd l="2006" t="8890" r="2458" b="8995">cross</wd>

<space/>

<wd l="2525" t="8837" r="3408" b="8995">validation</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="14"/>

<para l="3614" t="8842" r="4709" b="8995" alignment="centered" spaceBefore="38" lsp="exactly" lspExact="249" language="en">

<ln l="3614" t="8842" r="4709" b="8995" baseLine="8986" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="3614" t="8842" r="4200" b="8995">BLEU</wd>

<space/>

<wd l="4224" t="8890" r="4709" b="8995">score</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="14"/>

<para l="1416" t="9106" r="2146" b="9264" alignment="left" li="125" lsp="exactly" lspExact="248" language="en">

<ln l="1416" t="9106" r="2146" b="9264" baseLine="9254" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1416" t="9106" r="2011" b="9264">Round</wd>

<space/>

<wd l="2083" t="9106" r="2146" b="9259">1</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="14"/>

<para l="3634" t="9106" r="4114" b="9264" alignment="left" lsp="exactly" lspExact="248" language="en">

<tabs position="3634"/>

<ln l="3634" t="9106" r="4114" b="9264" baseLine="9254" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3634" t="9106" r="4114" b="9264">80.99</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="1416" t="9355" r="2165" b="9514" alignment="left" li="125" lsp="exactly" lspExact="239" language="en">

<ln l="1416" t="9355" r="2165" b="9514" baseLine="9509" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="9355" r="2011" b="9514">Round</wd>

<space/>

<wd l="2064" t="9355" r="2165" b="9509">2</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="3634" t="9355" r="4114" b="9514" alignment="left" lsp="exactly" lspExact="239" language="en">

<tabs position="3634"/>

<ln l="3634" t="9355" r="4114" b="9514" baseLine="9509" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3634" t="9355" r="4114" b="9514">81.57</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="1416" t="9610" r="2155" b="9768" alignment="left" li="125" lsp="exactly" lspExact="243" language="en">

<ln l="1416" t="9610" r="2155" b="9768" baseLine="9763" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="9610" r="2011" b="9768">Round</wd>

<space/>

<wd l="2064" t="9610" r="2155" b="9768">3</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="3634" t="9610" r="4118" b="9768" alignment="left" lsp="exactly" lspExact="243" language="en">

<tabs position="3634"/>

<ln l="3634" t="9610" r="4118" b="9768" baseLine="9763" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3634" t="9610" r="4118" b="9768">84.82</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<para l="1416" t="9864" r="2165" b="10022" alignment="left" li="125" lsp="exactly" lspExact="235" language="en">

<ln l="1416" t="9864" r="2165" b="10022" baseLine="10013" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="9864" r="2011" b="10022">Round</wd>

<space/>

<wd l="2059" t="9864" r="2165" b="10018">4</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="3634" t="9864" r="4099" b="10022" alignment="left" lsp="exactly" lspExact="235" language="en">

<tabs position="3634"/>

<ln l="3634" t="9864" r="4099" b="10022" baseLine="10013" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="3634" t="9864" r="4099" b="10022">83.91</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<para l="1416" t="10114" r="2155" b="10272" alignment="left" li="125" lsp="exactly" lspExact="243" language="en">

<ln l="1416" t="10114" r="2155" b="10272" baseLine="10267" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="10114" r="2011" b="10272">Round</wd>

<space/>

<wd l="2069" t="10118" r="2155" b="10272">5</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<para l="3634" t="10114" r="4118" b="10272" alignment="left" lsp="exactly" lspExact="243" language="en">

<tabs position="3634"/>

<ln l="3634" t="10114" r="4118" b="10272" baseLine="10267" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3634" t="10114" r="4118" b="10272">83.90</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<para l="1416" t="10368" r="2165" b="10526" alignment="left" li="125" lsp="exactly" lspExact="235" language="en">

<ln l="1416" t="10368" r="2165" b="10526" baseLine="10522" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10368" r="2011" b="10526">Round</wd>

<space/>

<wd l="2069" t="10368" r="2165" b="10526">6</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<para l="3634" t="10368" r="4109" b="10526" alignment="left" lsp="exactly" lspExact="235" language="en">

<tabs position="3634"/>

<ln l="3634" t="10368" r="4109" b="10526" baseLine="10522" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="3634" t="10368" r="4109" b="10526">83.55</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="1416" t="10622" r="2160" b="10824" alignment="left" li="125" lsp="exactly" lspExact="252" language="en">

<ln l="1416" t="10622" r="2160" b="10824" baseLine="10771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10622" r="2160" b="10824">Average</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="3634" t="10622" r="4118" b="10781" alignment="left" lsp="exactly" lspExact="252" language="en">

<tabs position="3634"/>

<ln l="3634" t="10622" r="4118" b="10781" baseLine="10771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3634" t="10622" r="4118" b="10781">83.12</wd>

</ln>

</para>

</cell>

</table>

<para l="1416" t="10906" r="5779" b="11318" alignment="justified" li="72" lsp="exactly" lspExact="253" language="en">

<ln l="1421" t="10906" r="5779" b="11064" baseLine="11054" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10906" r="1920" b="11064">Table</wd>

<space/>

<wd l="2026" t="10906" r="2174" b="11064">3:</wd>

<space/>

<wd l="2290" t="10906" r="3562" b="11064">Normalization</wd>

<space/>

<wd l="3662" t="10906" r="4234" b="11064">results</wd>

<space/>

<wd l="4339" t="10906" r="4598" b="11064">for</wd>

<space/>

<wd l="4704" t="10906" r="5237" b="11064">6-fold</wd>

<space/>

<wd l="5338" t="10954" r="5779" b="11064">cross</wd>

<space/>

</ln>

<ln l="1416" t="11160" r="2707" b="11318" baseLine="11309" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11160" r="2299" b="11318">validation</wd>

<space/>

<wd l="2352" t="11179" r="2707" b="11318">test.</wd>

</ln>

</para>

<para l="1416" t="11664" r="5789" b="15365" alignment="justified" li="72" spaceBefore="251" lsp="exactly" lspExact="252" language="en">

<ln l="1421" t="11664" r="5789" b="11866" baseLine="11813" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="11664" r="1762" b="11822">The</wd>

<space/>

<wd l="1882" t="11664" r="2702" b="11822">statistical</wd>

<space/>

<wd l="2818" t="11664" r="3562" b="11822">machine</wd>

<space/>

<wd l="3677" t="11664" r="4608" b="11822">translation</wd>

<space/>

<wd l="4723" t="11664" r="5309" b="11866">(SMT)</wd>

<space/>

<wd l="5434" t="11664" r="5573" b="11822">is</wd>

<space/>

<wd l="5693" t="11712" r="5789" b="11822">a</wd>

<space/>

</ln>

<ln l="1421" t="11918" r="5779" b="12120" baseLine="12067" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="11918" r="2515" b="12120">cutting-edge</wd>

<space/>

<wd l="2606" t="11918" r="3413" b="12120">approach</wd>

<space/>

<wd l="3494" t="11918" r="3826" b="12077">that</wd>

<space/>

<wd l="3912" t="11918" r="4579" b="12077">handles</wd>

<space/>

<wd l="4670" t="11918" r="4939" b="12077">the</wd>

<space/>

<wd l="5026" t="11918" r="5779" b="12077">normali-</wd>

</ln>

<ln l="1416" t="12168" r="5779" b="12370" baseLine="12322" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1416" t="12168" r="1958" b="12326">zation</wd>

<space/>

<wd l="2050" t="12168" r="2789" b="12370">problem</wd>

<space/>

<wd l="2885" t="12216" r="3058" b="12326">as</wd>

<space/>

<wd l="3163" t="12216" r="3259" b="12326">a</wd>

<space/>

<wd l="3365" t="12168" r="4181" b="12326">statistical</wd>

<space/>

<wd l="4286" t="12168" r="5030" b="12326">machine</wd>

<space/>

<wd l="5126" t="12168" r="5779" b="12326">transla-</wd>

</ln>

<ln l="1416" t="12422" r="5774" b="12624" baseLine="12576" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1416" t="12422" r="1762" b="12581">tion</wd>

<space/>

<wd l="1838" t="12422" r="2242" b="12614">task;</wd>

<space/>

<wd l="2342" t="12422" r="2458" b="12581">it</wd>

<space/>

<wd l="2539" t="12470" r="2875" b="12581">was</wd>

<space/>

<wd l="2966" t="12422" r="3317" b="12581">first</wd>

<space/>

<wd l="3398" t="12422" r="4339" b="12581">introduced</wd>

<space/>

<wd l="4411" t="12422" r="4637" b="12624">by</wd>

<space/>

<wd l="4714" t="12422" r="5078" b="12614">Aw,</wd>

<space/>

<wd l="5170" t="12422" r="5774" b="12624">Zhang,</wd>

<space/>

</ln>

<ln l="1416" t="12677" r="5784" b="12878" baseLine="12826" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1416" t="12677" r="1896" b="12869">Xiao,</wd>

<space/>

<wd l="2021" t="12677" r="2338" b="12835">and</wd>

<space/>

<wd l="2458" t="12677" r="2683" b="12835">Su</wd>

<space/>

<wd l="2798" t="12677" r="3427" b="12878">(2006).</wd>

<space/>

<wd l="3557" t="12677" r="3888" b="12835">The</wd>

<space/>

<wd l="4018" t="12677" r="4858" b="12835">SMT-like</wd>

<space/>

<wd l="4982" t="12677" r="5784" b="12878">approach</wd>

<space/>

</ln>

<ln l="1416" t="12926" r="5789" b="13128" baseLine="13080" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1416" t="12926" r="2246" b="13085">translates</wd>

<space/>

<wd l="2352" t="12974" r="2448" b="13085">a</wd>

<space/>

<wd l="2554" t="12974" r="3115" b="13085">source</wd>

<space/>

<wd l="3221" t="12926" r="4003" b="13128">language</wd>

<space/>

<wd l="4109" t="12926" r="4709" b="13128">(UGC)</wd>

<space/>

<wd l="4814" t="12946" r="4982" b="13085">to</wd>

<space/>

<wd l="5093" t="12974" r="5189" b="13085">a</wd>

<space/>

<wd l="5285" t="12946" r="5789" b="13128">target</wd>

<space/>

</ln>

<ln l="1421" t="13181" r="5784" b="13382" baseLine="13334" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="13181" r="2208" b="13382">language</wd>

<space/>

<wd l="2342" t="13181" r="3163" b="13382">(standard</wd>

<space/>

<wd l="3283" t="13181" r="4190" b="13382">language).</wd>

<space/>

<wd l="4330" t="13181" r="4666" b="13339">The</wd>

<space/>

<wd l="4800" t="13181" r="5784" b="13382">experiment</wd>

<space/>

</ln>

<ln l="1416" t="13435" r="5779" b="13637" baseLine="13584" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1416" t="13483" r="1757" b="13594">was</wd>

<space/>

<wd l="1819" t="13435" r="2746" b="13637">performed</wd>

<space/>

<wd l="2808" t="13435" r="3283" b="13637">using</wd>

<space/>

<wd l="3346" t="13440" r="3917" b="13594">Moses</wd>

<space/>

<wd l="3994" t="13435" r="4651" b="13637">(Koehn</wd>

<space/>

<wd l="4718" t="13454" r="4872" b="13594">et</wd>

<space/>

<wd l="4944" t="13435" r="5198" b="13627">al.,</wd>

<space/>

<wd l="5275" t="13435" r="5779" b="13637">2007)</wd>

<space/>

</ln>

<ln l="1421" t="13690" r="5779" b="13891" baseLine="13838" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="13690" r="1680" b="13848">for</wd>

<space/>

<wd l="1786" t="13690" r="2602" b="13848">statistical</wd>

<space/>

<wd l="2707" t="13690" r="3682" b="13882">translation,</wd>

<space/>

<wd l="3792" t="13690" r="4454" b="13848">Giza++</wd>

<space/>

<wd l="4560" t="13690" r="4992" b="13891">(Och</wd>

<space/>

<wd l="5098" t="13690" r="5261" b="13848">&amp;</wd>

<space/>

<wd l="5362" t="13694" r="5779" b="13891">Ney,</wd>

<space/>

</ln>

<ln l="1421" t="13939" r="5774" b="14141" baseLine="14093" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="13939" r="1925" b="14141">2003)</wd>

<space/>

<wd l="2006" t="13939" r="2266" b="14098">for</wd>

<space/>

<wd l="2333" t="13939" r="2789" b="14098">word</wd>

<space/>

<wd l="2861" t="13939" r="3782" b="14141">alignment,</wd>

<space/>

<wd l="3864" t="13939" r="4186" b="14098">and</wd>

<space/>

<wd l="4262" t="13939" r="4915" b="14098">SRILM</wd>

<space/>

<wd l="4997" t="13939" r="5774" b="14141">(Stolcke,</wd>

<space/>

</ln>

<ln l="1421" t="14194" r="5779" b="14395" baseLine="14342" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="14194" r="1925" b="14395">2002)</wd>

<space/>

<wd l="2045" t="14194" r="2299" b="14352">for</wd>

<space/>

<wd l="2410" t="14198" r="2731" b="14347">LM</wd>

<space/>

<wd l="2851" t="14194" r="3782" b="14395">compiling.</wd>

<space/>

<wd l="3907" t="14194" r="4243" b="14352">The</wd>

<space/>

<wd l="4368" t="14194" r="4800" b="14352">SMT</wd>

<space/>

<wd l="4925" t="14213" r="5530" b="14395">system</wd>

<space/>

<wd l="5640" t="14194" r="5779" b="14352">is</wd>

<space/>

</ln>

<ln l="1416" t="14448" r="5784" b="14650" baseLine="14597" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1416" t="14448" r="2035" b="14606">trained</wd>

<space/>

<wd l="2146" t="14448" r="2621" b="14650">using</wd>

<space/>

<wd l="2736" t="14496" r="3029" b="14606">our</wd>

<space/>

<wd l="3144" t="14448" r="3792" b="14606">Twitter</wd>

<space/>

<wd l="3902" t="14448" r="4555" b="14650">aligned</wd>

<space/>

<wd l="4666" t="14448" r="5318" b="14606">dataset.</wd>

<space/>

<wd l="5443" t="14448" r="5784" b="14606">The</wd>

<space/>

</ln>

<ln l="1421" t="14698" r="5789" b="14899" baseLine="14851" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="14698" r="2213" b="14899">optimum</wd>

<space/>

<wd l="2304" t="14698" r="2880" b="14856">results</wd>

<space/>

<wd l="2976" t="14746" r="3403" b="14856">were</wd>

<space/>

<wd l="3504" t="14698" r="4282" b="14856">achieved</wd>

<space/>

<wd l="4373" t="14698" r="4853" b="14899">using</wd>

<space/>

<wd l="4949" t="14746" r="5045" b="14856">a</wd>

<space/>

<wd l="5136" t="14698" r="5789" b="14899">trigram</wd>

<space/>

</ln>

<ln l="1416" t="14952" r="5765" b="15154" baseLine="15106" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1416" t="14957" r="1742" b="15106">LM</wd>

<space/>

<wd l="1906" t="14952" r="2222" b="15110">and</wd>

<space/>

<wd l="2371" t="14952" r="3101" b="15110">Backoff</wd>

<space/>

<wd l="3235" t="14952" r="4157" b="15154">smoothing</wd>

<space/>

<wd l="4310" t="14952" r="5050" b="15154">(Jelinek,</wd>

<space/>

<wd l="5232" t="14952" r="5765" b="15154">1990):</wd>

<space/>

</ln>

<ln l="1421" t="15206" r="3106" b="15365" baseLine="15355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1421" t="15206" r="1891" b="15365">78.81</wd>

<space/>

<wd l="1968" t="15211" r="2544" b="15365">BLEU</wd>

<space/>

<wd l="2606" t="15254" r="3106" b="15365">score.</wd>

</ln>

</para>

</column>

<column l="6055" t="5722" r="10577" b="15413">

<para l="6120" t="5770" r="10512" b="9509" alignment="justified" li="72" ri="72" spaceBefore="2" spaceAfter="244" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6355" t="5770" r="10493" b="5928" baseLine="5918" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6355" t="5770" r="6850" b="5928">Table</wd>

<space/>

<wd l="6974" t="5770" r="7080" b="5923">4</wd>

<space/>

<wd l="7214" t="5770" r="7987" b="5928">indicates</wd>

<space/>

<wd l="8122" t="5818" r="8573" b="5928">some</wd>

<space/>

<wd l="8712" t="5770" r="9456" b="5928">statistics</wd>

<space/>

<wd l="9586" t="5770" r="10070" b="5928">about</wd>

<space/>

<wd l="10200" t="5818" r="10493" b="5928">our</wd>

<space/>

</ln>

<ln l="6120" t="6024" r="10483" b="6226" baseLine="6173" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="6024" r="6710" b="6226">testing</wd>

<space/>

<wd l="6773" t="6024" r="7426" b="6182">dataset.</wd>

<space/>

<wd l="7498" t="6024" r="7838" b="6182">The</wd>

<space/>

<wd l="7906" t="6024" r="8376" b="6182">OOV</wd>

<space/>

<wd l="8434" t="6024" r="8971" b="6182">words</wd>

<space/>

<wd l="9038" t="6072" r="9302" b="6182">are</wd>

<space/>

<wd l="9365" t="6024" r="9826" b="6182">those</wd>

<space/>

<wd l="9893" t="6024" r="10483" b="6182">detect-</wd>

</ln>

<ln l="6125" t="6278" r="10488" b="6480" baseLine="6427" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="6278" r="6336" b="6437">ed</wd>

<space/>

<wd l="6432" t="6278" r="6658" b="6480">by</wd>

<space/>

<wd l="6763" t="6326" r="7056" b="6437">our</wd>

<space/>

<wd l="7162" t="6278" r="7632" b="6437">OOV</wd>

<space/>

<wd l="7742" t="6278" r="8544" b="6437">detection</wd>

<space/>

<wd l="8650" t="6278" r="9350" b="6437">module.</wd>

<space/>

<wd l="9470" t="6278" r="9806" b="6437">The</wd>

<space/>

<wd l="9917" t="6283" r="10488" b="6437">BLEU</wd>

<space/>

</ln>

<ln l="6130" t="6528" r="10483" b="6730" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="6576" r="6586" b="6686">score</wd>

<space/>

<wd l="6653" t="6528" r="6854" b="6686">of</wd>

<space/>

<wd l="6898" t="6576" r="7224" b="6686">raw</wd>

<space/>

<wd l="7286" t="6547" r="7618" b="6686">text</wd>

<space/>

<wd l="7685" t="6528" r="7824" b="6686">is</wd>

<space/>

<wd l="7896" t="6576" r="8093" b="6686">an</wd>

<space/>

<wd l="8165" t="6528" r="9019" b="6730">important</wd>

<space/>

<wd l="9082" t="6576" r="9811" b="6686">measure</wd>

<space/>

<wd l="9878" t="6547" r="10046" b="6686">to</wd>

<space/>

<wd l="10118" t="6576" r="10483" b="6686">ana-</wd>

</ln>

<ln l="6125" t="6782" r="10483" b="6984" baseLine="6931" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="6782" r="6485" b="6984">lyze</wd>

<space/>

<wd l="6566" t="6782" r="6835" b="6941">the</wd>

<space/>

<wd l="6926" t="6782" r="7742" b="6984">difficulty</wd>

<space/>

<wd l="7829" t="6782" r="8030" b="6941">of</wd>

<space/>

<wd l="8088" t="6782" r="8357" b="6941">the</wd>

<space/>

<wd l="8438" t="6782" r="8837" b="6941">task.</wd>

<space/>

<wd l="8938" t="6787" r="9062" b="6941">It</wd>

<space/>

<wd l="9149" t="6782" r="9288" b="6941">is</wd>

<space/>

<wd l="9379" t="6782" r="10238" b="6984">important</wd>

<space/>

<wd l="10320" t="6802" r="10483" b="6941">to</wd>

<space/>

</ln>

<ln l="6120" t="7037" r="10483" b="7238" baseLine="7186" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="7056" r="6499" b="7195">note</wd>

<space/>

<wd l="6566" t="7037" r="6898" b="7195">that</wd>

<space/>

<wd l="6965" t="7037" r="7229" b="7195">the</wd>

<space/>

<wd l="7306" t="7037" r="7910" b="7195">dataset</wd>

<space/>

<wd l="7978" t="7037" r="8386" b="7195">used</wd>

<space/>

<wd l="8448" t="7037" r="8616" b="7190">in</wd>

<space/>

<wd l="8688" t="7085" r="8981" b="7195">our</wd>

<space/>

<wd l="9048" t="7037" r="10032" b="7238">experiment</wd>

<space/>

<wd l="10104" t="7085" r="10483" b="7195">con-</wd>

</ln>

<ln l="6120" t="7286" r="10483" b="7488" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="7286" r="6533" b="7445">tains</wd>

<space/>

<wd l="6638" t="7334" r="6835" b="7445">an</wd>

<space/>

<wd l="6941" t="7286" r="7459" b="7445">above</wd>

<space/>

<wd l="7560" t="7334" r="8237" b="7488">average</wd>

<space/>

<wd l="8333" t="7286" r="9005" b="7445">number</wd>

<space/>

<wd l="9106" t="7286" r="9307" b="7445">of</wd>

<space/>

<wd l="9384" t="7286" r="9854" b="7445">OOV</wd>

<space/>

<wd l="9950" t="7286" r="10483" b="7445">words</wd>

<space/>

</ln>

<ln l="6125" t="7541" r="10478" b="7742" baseLine="7690" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="7541" r="6994" b="7742">compared</wd>

<space/>

<wd l="7061" t="7560" r="7229" b="7699">to</wd>

<space/>

<wd l="7306" t="7541" r="7574" b="7699">the</wd>

<space/>

<wd l="7656" t="7541" r="8342" b="7699">datasets</wd>

<space/>

<wd l="8424" t="7541" r="8587" b="7694">in</wd>

<space/>

<wd l="8664" t="7541" r="9120" b="7699">other</wd>

<space/>

<wd l="9187" t="7541" r="9792" b="7699">related</wd>

<space/>

<wd l="9854" t="7589" r="10478" b="7742">papers.</wd>

<space/>

</ln>

<ln l="6125" t="7795" r="10483" b="7997" baseLine="7944" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="7795" r="6466" b="7954">The</wd>

<space/>

<wd l="6538" t="7795" r="7147" b="7954">dataset</wd>

<space/>

<wd l="7214" t="7795" r="7622" b="7954">used</wd>

<space/>

<wd l="7685" t="7795" r="7910" b="7997">by</wd>

<space/>

<wd l="7978" t="7795" r="8549" b="7954">Kobus</wd>

<space/>

<wd l="8626" t="7814" r="8784" b="7954">et</wd>

<space/>

<wd l="8856" t="7795" r="9053" b="7954">al.</wd>

<space/>

<wd l="9139" t="7795" r="9715" b="7997">(2008)</wd>

<space/>

<wd l="9797" t="7795" r="10483" b="7954">consists</wd>

<space/>

</ln>

<ln l="6125" t="8045" r="10483" b="8246" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8045" r="6326" b="8203">of</wd>

<space/>

<wd l="6370" t="8045" r="6763" b="8208">32%</wd>

<space/>

<wd l="6835" t="8045" r="7306" b="8203">OOV</wd>

<space/>

<wd l="7368" t="8045" r="7954" b="8237">words,</wd>

<space/>

<wd l="8021" t="8045" r="8558" b="8203">which</wd>

<space/>

<wd l="8621" t="8045" r="8760" b="8203">is</wd>

<space/>

<wd l="8837" t="8045" r="9490" b="8246">slightly</wd>

<space/>

<wd l="9552" t="8045" r="10051" b="8203">lower</wd>

<space/>

<wd l="10109" t="8045" r="10483" b="8203">than</wd>

<space/>

</ln>

<ln l="6125" t="8299" r="10483" b="8501" baseLine="8448" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8299" r="6518" b="8462">34%</wd>

<space/>

<wd l="6595" t="8299" r="6797" b="8458">of</wd>

<space/>

<wd l="6845" t="8347" r="7138" b="8458">our</wd>

<space/>

<wd l="7200" t="8299" r="7853" b="8458">dataset.</wd>

<space/>

<wd l="7934" t="8304" r="8107" b="8453">In</wd>

<space/>

<wd l="8179" t="8299" r="8947" b="8491">addition,</wd>

<space/>

<wd l="9019" t="8299" r="9336" b="8458">Aw</wd>

<space/>

<wd l="9403" t="8318" r="9557" b="8458">et</wd>

<space/>

<wd l="9629" t="8299" r="9826" b="8458">al.</wd>

<space/>

<wd l="9907" t="8299" r="10483" b="8501">(2006)</wd>

<space/>

</ln>

<ln l="6120" t="8549" r="10512" b="8707" baseLine="8702" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="8549" r="6533" b="8707">used</wd>

<space/>

<wd l="6634" t="8597" r="6730" b="8707">a</wd>

<space/>

<wd l="6835" t="8549" r="7440" b="8707">dataset</wd>

<space/>

<wd l="7546" t="8549" r="7934" b="8707">with</wd>

<space/>

<wd l="8040" t="8597" r="8136" b="8707">a</wd>

<space/>

<wd l="8232" t="8549" r="8957" b="8707">baseline</wd>

<space/>

<wd l="9062" t="8554" r="9634" b="8707">BLEU</wd>

<space/>

<wd l="9749" t="8597" r="10200" b="8707">score</wd>

<space/>

<wd l="10310" t="8549" r="10512" b="8707">of</wd>

<space/>

</ln>

<ln l="6130" t="8803" r="10488" b="8995" baseLine="8957" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="8803" r="6667" b="8995">57.84,</wd>

<space/>

<wd l="6749" t="8803" r="7286" b="8962">which</wd>

<space/>

<wd l="7363" t="8803" r="8141" b="8962">indicates</wd>

<space/>

<wd l="8218" t="8803" r="8549" b="8962">that</wd>

<space/>

<wd l="8621" t="8803" r="8890" b="8962">the</wd>

<space/>

<wd l="8966" t="8851" r="9298" b="8962">raw</wd>

<space/>

<wd l="9374" t="8822" r="9706" b="8962">text</wd>

<space/>

<wd l="9782" t="8803" r="9922" b="8962">is</wd>

<space/>

<wd l="9998" t="8803" r="10488" b="8962">much</wd>

<space/>

</ln>

<ln l="6120" t="9058" r="10483" b="9259" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="9106" r="6571" b="9216">more</wd>

<space/>

<wd l="6648" t="9058" r="7258" b="9216">similar</wd>

<space/>

<wd l="7320" t="9077" r="7488" b="9216">to</wd>

<space/>

<wd l="7555" t="9058" r="7824" b="9216">the</wd>

<space/>

<wd l="7896" t="9058" r="8539" b="9216">manual</wd>

<space/>

<wd l="8606" t="9058" r="9470" b="9216">translated</wd>

<space/>

<wd l="9533" t="9077" r="9864" b="9216">text</wd>

<space/>

<wd l="9931" t="9058" r="10483" b="9259">(refer-</wd>

</ln>

<ln l="6125" t="9307" r="10320" b="9509" baseLine="9461" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="9355" r="6523" b="9466">ence</wd>

<space/>

<wd l="6581" t="9307" r="6979" b="9509">text)</wd>

<space/>

<wd l="7037" t="9307" r="7421" b="9466">than</wd>

<space/>

<wd l="7474" t="9307" r="7738" b="9466">the</wd>

<space/>

<wd l="7800" t="9355" r="8194" b="9466">ones</wd>

<space/>

<wd l="8256" t="9307" r="8664" b="9466">used</wd>

<space/>

<wd l="8717" t="9307" r="8885" b="9461">in</wd>

<space/>

<wd l="8947" t="9355" r="9235" b="9466">our</wd>

<space/>

<wd l="9293" t="9307" r="10320" b="9509">experiment.</wd>

</ln>

</para>

<rulerline l="6307" t="9782" r="10301" b="9782" type="single" width="34" color="000000"/>

<table l="6271" t="9764" r="10325" b="11078" alignment="left" li="216" ri="252" spaceAfter="34">

<bottomBorder type="single" width="34"/>

<gridTable>

<gridCol>3132</gridCol>

<gridCol>922</gridCol>

<gridRow>287</gridRow>

<gridRow>255</gridRow>

<gridRow>254</gridRow>

<gridRow>250</gridRow>

<gridRow>268</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<para l="6418" t="9850" r="9293" b="10051" alignment="left" li="147" spaceBefore="34" lsp="exactly" lspExact="238" language="en">

<ln l="6418" t="9850" r="9293" b="10051" baseLine="9998" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6418" t="9850" r="6840" b="10051">Avg.</wd>

<space/>

<wd l="6912" t="9850" r="7459" b="10051">length</wd>

<space/>

<wd l="7517" t="9850" r="7718" b="10008">of</wd>

<space/>

<wd l="7752" t="9850" r="8285" b="10008">words</wd>

<space/>

<wd l="8352" t="9850" r="9293" b="10051">(character)</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<para l="9523" t="9854" r="9610" b="10008" alignment="left" li="111" spaceBefore="34" lsp="exactly" lspExact="238" language="en">

<ln l="9523" t="9854" r="9610" b="10008" baseLine="9998" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9523" t="9854" r="9610" b="10008">5</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<para l="6418" t="10099" r="8410" b="10301" alignment="left" li="147" lsp="exactly" lspExact="243" language="en">

<ln l="6418" t="10099" r="8410" b="10301" baseLine="10253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6418" t="10099" r="6840" b="10301">Avg.</wd>

<space/>

<wd l="6907" t="10099" r="7584" b="10258">number</wd>

<space/>

<wd l="7642" t="10099" r="7843" b="10258">of</wd>

<space/>

<wd l="7877" t="10099" r="8410" b="10258">words</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<para l="9538" t="10099" r="9710" b="10253" alignment="left" li="111" lsp="exactly" lspExact="243" language="en">

<ln l="9538" t="10099" r="9710" b="10253" baseLine="10253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-16">

<wd l="9538" t="10099" r="9710" b="10253">11</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="6422" t="10354" r="8126" b="10512" alignment="left" li="147" lsp="exactly" lspExact="248" language="en">

<ln l="6422" t="10354" r="8126" b="10512" baseLine="10507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6422" t="10354" r="6878" b="10512">Total</wd>

<space/>

<wd l="6936" t="10358" r="7253" b="10512">No.</wd>

<space/>

<wd l="7325" t="10354" r="7526" b="10512">of</wd>

<space/>

<wd l="7555" t="10354" r="8126" b="10512">tokens</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="9538" t="10354" r="10109" b="10546" alignment="left" li="111" lsp="exactly" lspExact="248" language="en">

<ln l="9538" t="10354" r="10109" b="10546" baseLine="10507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="9538" t="10354" r="10109" b="10546">19,759</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="6422" t="10608" r="7488" b="10766" alignment="left" li="147" lsp="exactly" lspExact="240" language="en">

<ln l="6422" t="10608" r="7488" b="10766" baseLine="10757" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6422" t="10608" r="6893" b="10766">OOV</wd>

<space/>

<wd l="6950" t="10608" r="7488" b="10766">words</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="9518" t="10608" r="10186" b="10771" alignment="left" li="111" lsp="exactly" lspExact="240" language="en">

<ln l="9518" t="10608" r="10186" b="10771" baseLine="10757" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9518" t="10608" r="10186" b="10771">34.02%</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="6418" t="10858" r="8525" b="11016" alignment="left" li="147" spaceAfter="4" lsp="exactly" lspExact="253" language="en">

<ln l="6418" t="10858" r="8525" b="11016" baseLine="11011" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6418" t="10862" r="6994" b="11016">BLEU</wd>

<space/>

<wd l="7056" t="10906" r="7512" b="11016">score</wd>

<space/>

<wd l="7574" t="10858" r="7776" b="11016">of</wd>

<space/>

<wd l="7805" t="10906" r="8136" b="11016">raw</wd>

<space/>

<wd l="8194" t="10877" r="8525" b="11016">text</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="34"/>

<para l="9514" t="10858" r="9984" b="11016" alignment="left" li="111" spaceAfter="4" lsp="exactly" lspExact="253" language="en">

<ln l="9514" t="10858" r="9984" b="11016" baseLine="11011" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="9514" t="10858" r="9984" b="11016">42.01</wd>

</ln>

</para>

</cell>

</table>

<para l="6125" t="11141" r="9293" b="11342" alignment="left" li="72" ri="72" lsp="exactly" lspExact="253" language="en">

<ln l="6125" t="11141" r="9293" b="11342" baseLine="11294" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11141" r="6624" b="11299">Table</wd>

<space/>

<wd l="6677" t="11141" r="6830" b="11299">4:</wd>

<space/>

<wd l="6917" t="11141" r="7699" b="11299">Statistics</wd>

<space/>

<wd l="7762" t="11141" r="7963" b="11299">of</wd>

<space/>

<wd l="7997" t="11141" r="8582" b="11342">testing</wd>

<space/>

<wd l="8640" t="11141" r="9293" b="11299">dataset.</wd>

</ln>

</para>

<para l="6115" t="11650" r="10512" b="15384" alignment="justified" li="72" ri="72" spaceBefore="252" spaceAfter="9" lsp="exactly" lspExact="253" language="en">

<ln l="6120" t="11650" r="10512" b="11851" baseLine="11798" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="11650" r="6360" b="11808">As</wd>

<space/>

<wd l="6504" t="11650" r="7070" b="11808">shown</wd>

<space/>

<wd l="7200" t="11650" r="7368" b="11803">in</wd>

<space/>

<wd l="7498" t="11650" r="7997" b="11808">Table</wd>

<space/>

<wd l="8126" t="11650" r="8285" b="11842">4,</wd>

<space/>

<wd l="8419" t="11650" r="8683" b="11808">the</wd>

<space/>

<wd l="8818" t="11698" r="9494" b="11851">average</wd>

<space/>

<wd l="9629" t="11650" r="10176" b="11851">length</wd>

<space/>

<wd l="10310" t="11650" r="10512" b="11808">of</wd>

<space/>

</ln>

<ln l="6120" t="11904" r="10483" b="12096" baseLine="12053" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="11904" r="6658" b="12062">words</wd>

<space/>

<wd l="6758" t="11904" r="6898" b="12062">is</wd>

<space/>

<wd l="6998" t="11904" r="7334" b="12062">five</wd>

<space/>

<wd l="7435" t="11904" r="8371" b="12096">characters,</wd>

<space/>

<wd l="8472" t="11904" r="9014" b="12062">which</wd>

<space/>

<wd l="9106" t="11904" r="9662" b="12062">makes</wd>

<space/>

<wd l="9758" t="11904" r="10027" b="12062">the</wd>

<space/>

<wd l="10123" t="11952" r="10483" b="12062">nor-</wd>

</ln>

<ln l="6120" t="12154" r="10488" b="12355" baseLine="12307" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="12154" r="7051" b="12312">malization</wd>

<space/>

<wd l="7133" t="12154" r="7493" b="12312">task</wd>

<space/>

<wd l="7570" t="12202" r="8016" b="12312">more</wd>

<space/>

<wd l="8107" t="12154" r="8856" b="12312">difficult.</wd>

<space/>

<wd l="8947" t="12158" r="9259" b="12312">For</wd>

<space/>

<wd l="9341" t="12154" r="10128" b="12355">example,</wd>

<space/>

<wd l="10219" t="12154" r="10488" b="12312">the</wd>

<space/>

</ln>

<ln l="6125" t="12408" r="10483" b="12610" baseLine="12557">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6125" t="12408" r="6965" b="12566">candidate</wd>

<space/>

<wd l="7046" t="12427" r="7282" b="12566">set</wd>

<space/>

<wd l="7354" t="12408" r="7613" b="12566">for</wd>

<space/>

<wd l="7675" t="12408" r="7944" b="12566">the</wd>

<space/>

<wd l="8021" t="12408" r="8491" b="12566">OOV</wd>

<space/>

<wd l="8563" t="12408" r="9019" b="12566">word</wd>

<space/>

</run>

<wd l="9091" t="12408" r="9682" b="12610"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">yoor</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="9758" t="12408" r="10483" b="12566">contains</wd>

<space/>

</run>

</ln>

<ln l="6130" t="12662" r="10488" b="12864" baseLine="12811" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="12662" r="6331" b="12821">59</wd>

<space/>

<wd l="6398" t="12662" r="6984" b="12854">words,</wd>

<space/>

<wd l="7051" t="12710" r="7229" b="12821">as</wd>

<space/>

<wd l="7296" t="12662" r="7862" b="12821">shown</wd>

<space/>

<wd l="7920" t="12662" r="8088" b="12816">in</wd>

<space/>

<wd l="8146" t="12662" r="8640" b="12821">Table</wd>

<space/>

<wd l="8707" t="12667" r="8851" b="12821">5.</wd>

<space/>

<wd l="8923" t="12662" r="9264" b="12821">The</wd>

<space/>

<wd l="9322" t="12662" r="9758" b="12864">large</wd>

<space/>

<wd l="9816" t="12662" r="10488" b="12821">number</wd>

<space/>

</ln>

<ln l="6125" t="12912" r="10483" b="13114" baseLine="13066" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="12912" r="6326" b="13070">of</wd>

<space/>

<wd l="6394" t="12912" r="7315" b="13070">candidates</wd>

<space/>

<wd l="7406" t="12960" r="7973" b="13070">causes</wd>

<space/>

<wd l="8064" t="12912" r="8880" b="13114">difficulty</wd>

<space/>

<wd l="8966" t="12912" r="9226" b="13070">for</wd>

<space/>

<wd l="9307" t="12912" r="10147" b="13070">candidate</wd>

<space/>

<wd l="10243" t="12960" r="10483" b="13070">se-</wd>

</ln>

<ln l="6125" t="13166" r="10483" b="13368" baseLine="13320" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="13166" r="6720" b="13325">lection</wd>

<space/>

<wd l="6782" t="13166" r="7483" b="13325">because</wd>

<space/>

<wd l="7550" t="13214" r="7997" b="13325">more</wd>

<space/>

<wd l="8069" t="13166" r="8707" b="13368">options</wd>

<space/>

<wd l="8784" t="13166" r="9149" b="13325">lead</wd>

<space/>

<wd l="9211" t="13186" r="9379" b="13325">to</wd>

<space/>

<wd l="9446" t="13214" r="9898" b="13325">more</wd>

<space/>

<wd l="9960" t="13166" r="10483" b="13368">possi-</wd>

</ln>

<ln l="6115" t="13421" r="10483" b="13622" baseLine="13570" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6115" t="13421" r="6720" b="13579">bilities</wd>

<space/>

<wd l="6850" t="13421" r="7166" b="13579">and</wd>

<space/>

<wd l="7282" t="13469" r="7733" b="13579">more</wd>

<space/>

<wd l="7862" t="13421" r="9110" b="13622">computational</wd>

<space/>

<wd l="9240" t="13440" r="9634" b="13579">cost.</wd>

<space/>

<wd l="9768" t="13421" r="10483" b="13579">Further-</wd>

</ln>

<ln l="6120" t="13670" r="10483" b="13872" baseLine="13824" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="13718" r="6619" b="13862">more,</wd>

<space/>

<wd l="6686" t="13670" r="6955" b="13829">the</wd>

<space/>

<wd l="7018" t="13670" r="7877" b="13872">generated</wd>

<space/>

<wd l="7934" t="13670" r="8856" b="13829">candidates</wd>

<space/>

<wd l="8918" t="13718" r="9182" b="13829">are</wd>

<space/>

<wd l="9245" t="13670" r="10046" b="13872">lexically,</wd>

<space/>

<wd l="10118" t="13718" r="10483" b="13872">syn-</wd>

</ln>

<ln l="6120" t="13925" r="10488" b="14126" baseLine="14078" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="13925" r="6974" b="14126">tactically,</wd>

<space/>

<wd l="7109" t="13925" r="7430" b="14083">and</wd>

<space/>

<wd l="7555" t="13925" r="8654" b="14126">semantically</wd>

<space/>

<wd l="8774" t="13973" r="9168" b="14126">very</wd>

<space/>

<wd l="9293" t="13925" r="9667" b="14083">akin</wd>

<space/>

<wd l="9792" t="13944" r="9960" b="14083">to</wd>

<space/>

<wd l="10090" t="13925" r="10488" b="14083">each</wd>

<space/>

</ln>

<ln l="6125" t="14179" r="10483" b="14381" baseLine="14328">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6125" t="14179" r="6619" b="14338">other.</wd>

<space/>

<wd l="6720" t="14184" r="7032" b="14338">For</wd>

<space/>

<wd l="7123" t="14179" r="7915" b="14381">example,</wd>

<space/>

<wd l="8016" t="14179" r="8270" b="14338">for</wd>

<space/>

<wd l="8362" t="14179" r="8626" b="14338">the</wd>

<space/>

<wd l="8726" t="14179" r="9197" b="14338">OOV</wd>

<space/>

<wd l="9288" t="14179" r="9749" b="14338">word</wd>

<space/>

</run>

<wd l="9840" t="14179" r="10483" b="14381"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">yoor</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

</ln>

<ln l="6125" t="14434" r="10512" b="14635" baseLine="14582">

<wd l="6125" t="14434" r="6624" b="14592"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">our</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="6734" t="14434" r="7248" b="14635">might</wd>

<space/>

<wd l="7354" t="14434" r="7565" b="14592">be</wd>

<space/>

<wd l="7675" t="14434" r="8640" b="14635">mistakenly</wd>

<space/>

<wd l="8760" t="14434" r="9470" b="14592">selected</wd>

<space/>

<wd l="9581" t="14434" r="10200" b="14592">instead</wd>

<space/>

<wd l="10310" t="14434" r="10512" b="14592">of</wd>

<space/>

</run>

</ln>

<ln l="6125" t="14683" r="10483" b="14885" baseLine="14837">

<wd l="6125" t="14683" r="6763" b="14885"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">your</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="6859" t="14683" r="7368" b="14842">There</wd>

<space/>

<wd l="7454" t="14731" r="7718" b="14842">are</wd>

<space/>

<wd l="7805" t="14731" r="7901" b="14842">a</wd>

<space/>

<wd l="7987" t="14683" r="8626" b="14842">smaller</wd>

<space/>

<wd l="8702" t="14683" r="9374" b="14842">number</wd>

<space/>

<wd l="9456" t="14683" r="9658" b="14842">of</wd>

<space/>

<wd l="9710" t="14683" r="10483" b="14885">potential</wd>

<space/>

</run>

</ln>

<ln l="6125" t="14938" r="10488" b="15139" baseLine="15086" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="14938" r="7046" b="15096">candidates</wd>

<space/>

<wd l="7123" t="14938" r="7378" b="15096">for</wd>

<space/>

<wd l="7445" t="14938" r="8102" b="15139">lengthy</wd>

<space/>

<wd l="8170" t="14938" r="8640" b="15096">OOV</wd>

<space/>

<wd l="8707" t="14938" r="9288" b="15096">words.</wd>

<space/>

<wd l="9365" t="14938" r="9605" b="15096">As</wd>

<space/>

<wd l="9686" t="14938" r="10253" b="15096">shown</wd>

<space/>

<wd l="10320" t="14938" r="10488" b="15091">in</wd>

<space/>

</ln>

<ln l="6125" t="15192" r="10488" b="15384" baseLine="15341" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="15192" r="6624" b="15350">Table</wd>

<space/>

<wd l="6715" t="15197" r="6864" b="15384">5,</wd>

<space/>

<wd l="6950" t="15192" r="7219" b="15350">the</wd>

<space/>

<wd l="7301" t="15192" r="7973" b="15350">number</wd>

<space/>

<wd l="8059" t="15192" r="8256" b="15350">of</wd>

<space/>

<wd l="8318" t="15192" r="9240" b="15350">candidates</wd>

<space/>

<wd l="9331" t="15192" r="9586" b="15350">for</wd>

<space/>

<wd l="9662" t="15192" r="9931" b="15350">the</wd>

<space/>

<wd l="10018" t="15192" r="10488" b="15350">OOV</wd>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15736" r="6181" b="15977">

<para l="5804" t="15787" r="6148" b="15941" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15787" r="6082" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="26">

<wd l="5870" t="15792" r="6082" b="15941">24</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1291" marginTop="1417" marginRight="1392" marginBottom="1302" offsetX="-6" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1291" t="1417" r="10517" b="15317">

<column l="1291" t="1417" r="5894" b="15317">

<para l="1416" t="1464" r="5789" b="3437" alignment="justified" li="72" ri="72" spaceBefore="1" spaceAfter="244" lsp="exactly" lspExact="253" language="en">

<ln l="1416" t="1464" r="5779" b="1666" baseLine="1613">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1416" t="1464" r="1877" b="1622">word</wd>

<space/>

</run>

<wd l="1939" t="1464" r="3120" b="1666"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">acessibility</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3192" t="1464" r="3331" b="1622">is</wd>

<space/>

<wd l="3403" t="1464" r="3792" b="1666">only</wd>

<space/>

<wd l="3878" t="1464" r="4123" b="1656">14,</wd>

<space/>

<wd l="4195" t="1464" r="4733" b="1622">which</wd>

<space/>

<wd l="4800" t="1464" r="4939" b="1622">is</wd>

<space/>

<wd l="5016" t="1464" r="5338" b="1622">less</wd>

<space/>

<wd l="5405" t="1464" r="5779" b="1622">than</wd>

<space/>

</run>

</ln>

<ln l="1421" t="1714" r="5779" b="1915" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1762" r="2150" b="1915">average,</wd>

<space/>

<wd l="2222" t="1714" r="2885" b="1915">thereby</wd>

<space/>

<wd l="2947" t="1714" r="3605" b="1915">making</wd>

<space/>

<wd l="3677" t="1714" r="4517" b="1872">candidate</wd>

<space/>

<wd l="4594" t="1714" r="5366" b="1872">selection</wd>

<space/>

<wd l="5438" t="1762" r="5779" b="1872">eas-</wd>

</ln>

<ln l="1421" t="1968" r="5779" b="2160" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1968" r="1694" b="2126">ier.</wd>

<space/>

<wd l="1800" t="1973" r="2717" b="2160">Moreover,</wd>

<space/>

<wd l="2813" t="1968" r="3254" b="2126">there</wd>

<space/>

<wd l="3350" t="1968" r="3490" b="2126">is</wd>

<space/>

<wd l="3590" t="2016" r="3686" b="2126">a</wd>

<space/>

<wd l="3782" t="1968" r="4426" b="2126">distinct</wd>

<space/>

<wd l="4526" t="1968" r="5410" b="2126">difference</wd>

<space/>

<wd l="5501" t="1968" r="5779" b="2126">be-</wd>

</ln>

<ln l="1416" t="2222" r="5779" b="2424" baseLine="2371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="2242" r="1944" b="2381">tween</wd>

<space/>

<wd l="2040" t="2222" r="2309" b="2381">the</wd>

<space/>

<wd l="2405" t="2222" r="3240" b="2424">meanings</wd>

<space/>

<wd l="3346" t="2222" r="3547" b="2381">of</wd>

<space/>

<wd l="3629" t="2222" r="4598" b="2414">candidates,</wd>

<space/>

<wd l="4699" t="2222" r="5237" b="2381">which</wd>

<space/>

<wd l="5338" t="2222" r="5477" b="2381">is</wd>

<space/>

<wd l="5582" t="2270" r="5779" b="2381">an</wd>

<space/>

</ln>

<ln l="1421" t="2472" r="5784" b="2674" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="2520" r="1810" b="2674">easy</wd>

<space/>

<wd l="1915" t="2472" r="2664" b="2630">situation</wd>

<space/>

<wd l="2765" t="2472" r="3019" b="2630">for</wd>

<space/>

<wd l="3115" t="2520" r="3408" b="2630">our</wd>

<space/>

<wd l="3504" t="2472" r="4728" b="2630">context-based</wd>

<space/>

<wd l="4814" t="2472" r="5784" b="2674">probability</wd>

<space/>

</ln>

<ln l="1421" t="2726" r="5789" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="2726" r="2232" b="2885">functions</wd>

<space/>

<wd l="2309" t="2746" r="2477" b="2885">to</wd>

<space/>

<wd l="2558" t="2726" r="3053" b="2885">select</wd>

<space/>

<wd l="3125" t="2726" r="3389" b="2885">the</wd>

<space/>

<wd l="3466" t="2746" r="4075" b="2885">correct</wd>

<space/>

<wd l="4147" t="2774" r="4507" b="2885">one.</wd>

<space/>

<wd l="4589" t="2726" r="5419" b="2928">Although</wd>

<space/>

<wd l="5496" t="2774" r="5789" b="2885">our</wd>

<space/>

</ln>

<ln l="1421" t="2981" r="5779" b="3182" baseLine="3130" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="2981" r="2227" b="3182">approach</wd>

<space/>

<wd l="2318" t="2981" r="3072" b="3139">obtained</wd>

<space/>

<wd l="3149" t="2981" r="4046" b="3182">promising</wd>

<space/>

<wd l="4128" t="2981" r="4704" b="3139">results</wd>

<space/>

<wd l="4800" t="3029" r="5011" b="3139">on</wd>

<space/>

<wd l="5098" t="2981" r="5414" b="3139">this</wd>

<space/>

<wd l="5506" t="2981" r="5779" b="3139">da-</wd>

</ln>

<ln l="1416" t="3235" r="4560" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3254" r="1872" b="3427">taset,</wd>

<space/>

<wd l="1934" t="3235" r="2054" b="3394">it</wd>

<space/>

<wd l="2112" t="3235" r="2640" b="3394">works</wd>

<space/>

<wd l="2698" t="3235" r="3206" b="3394">better</wd>

<space/>

<wd l="3264" t="3283" r="3480" b="3394">on</wd>

<space/>

<wd l="3538" t="3235" r="3922" b="3437">long</wd>

<space/>

<wd l="3974" t="3235" r="4560" b="3394">words.</wd>

</ln>

</para>

<rulerline l="1291" t="3706" r="5894" b="3706" type="single" width="34" color="000000"/>

<para l="1426" t="3768" r="5371" b="3970" alignment="left" li="72" ri="72" spaceBefore="42" lsp="exactly" lspExact="249" language="en">

<tabs position="1426"/>

<ln l="1426" t="3768" r="5371" b="3970" baseLine="3922" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="3768" r="1896" b="3926">OOV</wd>

<space/>

<wd l="1925" t="3768" r="2386" b="3926">word</wd>

<space/>

<wd l="2635" t="3768" r="3538" b="3926">Candidate</wd>

<space/>

<wd l="3590" t="3797" r="3845" b="3926">set</wd>

<tab position="3845"/>

<wd l="4776" t="3773" r="5078" b="3960">No.</wd>

<space/>

<wd l="5160" t="3768" r="5371" b="3970">of</wd>

</ln>

</para>

<para l="4790" t="4022" r="5736" b="4181" alignment="right" li="72" ri="72" lsp="exactly" lspExact="240" language="en">

<ln l="4790" t="4022" r="5736" b="4181" baseLine="4171" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4790" t="4022" r="5736" b="4181">candidates</wd>

</ln>

</para>

<rulerline l="1291" t="4238" r="5894" b="4238" type="single" width="19" color="000000"/>

<para l="1421" t="4291" r="5456" b="4493" alignment="left" li="72" ri="72" spaceBefore="15" lsp="exactly" lspExact="253" language="en">

<tabs position="1421"/>

<tabs alignment="right" position="4493" leaderChar=" "/>

<ln l="1421" t="4291" r="5390" b="4493" baseLine="4437" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<tab position="0"/>

<wd l="1421" t="4291" r="2410" b="4493">acessibility</wd>

<space/>

<wd l="2626" t="4291" r="3758" b="4493">accessibility,</wd>

<space/>

<wd l="3878" t="4291" r="4565" b="4450">accessi-</wd>

<tab position="4565"/>

<wd l="5198" t="4291" r="5390" b="4445">14</wd>

</ln>

</para>

<para l="2616" t="4541" r="4565" b="6264" alignment="justified" li="1296" ri="1296" spaceBefore="3" lsp="exactly" lspExact="253" language="en">

<ln l="2616" t="4541" r="4565" b="4742" baseLine="4694" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="2616" t="4541" r="2952" b="4742">bly,</wd>

<space/>

<wd l="3163" t="4541" r="3902" b="4742">basicity,</wd>

<space/>

<wd l="4114" t="4541" r="4565" b="4742">bicy-</wd>

</ln>

<ln l="2626" t="4795" r="4565" b="4997" baseLine="4949" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="2626" t="4795" r="3125" b="4987">clists,</wd>

<space/>

<wd l="3240" t="4795" r="4037" b="4997">bicyclist,</wd>

<space/>

<wd l="4162" t="4795" r="4565" b="4954">itali-</wd>

</ln>

<ln l="2626" t="5050" r="4565" b="5251" baseLine="5198" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="2626" t="5050" r="3110" b="5242">cizes,</wd>

<space/>

<wd l="3250" t="5050" r="3989" b="5242">abilities,</wd>

<space/>

<wd l="4114" t="5050" r="4565" b="5251">bicy-</wd>

</ln>

<ln l="2626" t="5304" r="4565" b="5496" baseLine="5453" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="2626" t="5304" r="3038" b="5496">clist,</wd>

<space/>

<wd l="3187" t="5304" r="3946" b="5496">sibilates,</wd>

<space/>

<wd l="4094" t="5304" r="4565" b="5462">stabi-</wd>

</ln>

<ln l="2626" t="5554" r="4565" b="5746" baseLine="5707" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="2626" t="5554" r="2990" b="5746">lize,</wd>

<space/>

<wd l="3110" t="5554" r="3859" b="5746">silicates,</wd>

<space/>

<wd l="3979" t="5554" r="4565" b="5712">celiba-</wd>

</ln>

<ln l="2626" t="5808" r="4565" b="6010" baseLine="5962" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="2626" t="5856" r="2875" b="6010">cy,</wd>

<space/>

<wd l="2995" t="5808" r="3773" b="6010">bicycles,</wd>

<space/>

<wd l="3898" t="5808" r="4219" b="5966">and</wd>

<space/>

<wd l="4325" t="5808" r="4565" b="5966">bi-</wd>

</ln>

<ln l="2626" t="6062" r="3130" b="6264" baseLine="6211" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="2626" t="6062" r="3130" b="6264">cycle.</wd>

</ln>

</para>

<para l="1709" t="6312" r="5452" b="7277" alignment="left" li="1296" ri="432" spaceBefore="1" spaceAfter="4" fli="-864" lsp="exactly" lspExact="253" language="en">

<ln l="1709" t="6312" r="5386" b="6514" baseLine="6463" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="31" forcedEOF="true">

<wd l="1709" t="6360" r="2117" b="6514">yoor</wd>

<tab position="0"/>

<tab position="2117"/>

<wd l="2621" t="6360" r="3072" b="6514">your,</wd>

<space/>

<wd l="3178" t="6360" r="3557" b="6514">you,</wd>

<space/>

<wd l="3667" t="6312" r="4114" b="6504">door,</wd>

<space/>

<wd l="4224" t="6360" r="4565" b="6504">our,</wd>

<tab position="4565"/>

<wd l="5184" t="6312" r="5386" b="6470">59
</wd>

</ln>

<ln l="2626" t="6566" r="4560" b="6768" baseLine="6720" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="31" forcedEOF="true">

<wd l="2626" t="6614" r="2856" b="6758">or,</wd>

<space/>

<wd l="2986" t="6614" r="3461" b="6768">yoga,</wd>

<space/>

<wd l="3590" t="6566" r="3955" b="6768">yak,</wd>

<space/>

<wd l="4085" t="6566" r="4560" b="6768">yuck,</wd>

<space/>

</ln>

<ln l="2621" t="6821" r="4565" b="7022" baseLine="6970" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="31" forcedEOF="true">

<wd l="2621" t="6821" r="3048" b="7022">yule,</wd>

<space/>

<wd l="3331" t="6869" r="3878" b="7013">moon,</wd>

<space/>

<wd l="4157" t="6840" r="4565" b="7013">tour,</wd>

<space/>

</ln>

<ln l="2616" t="7080" r="3336" b="7277" baseLine="7224" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="31" forcedEOF="true">

<wd l="2616" t="7123" r="3077" b="7277">poor,</wd>

<space/>

<wd l="3163" t="7205" r="3336" b="7234">...</wd>

</ln>

</para>

<rulerline l="1291" t="7306" r="5894" b="7306" type="single" width="34" color="000000"/>

<para l="1416" t="7354" r="5784" b="7766" alignment="justified" li="72" ri="72" spaceBefore="26" lsp="exactly" lspExact="253" language="en">

<ln l="1421" t="7354" r="5784" b="7555" baseLine="7507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7354" r="1920" b="7512">Table</wd>

<space/>

<wd l="2045" t="7358" r="2189" b="7512">5:</wd>

<space/>

<wd l="2328" t="7354" r="3106" b="7555">Example</wd>

<space/>

<wd l="3230" t="7354" r="3432" b="7512">of</wd>

<space/>

<wd l="3533" t="7354" r="4368" b="7512">candidate</wd>

<space/>

<wd l="4498" t="7373" r="4814" b="7512">sets</wd>

<space/>

<wd l="4939" t="7354" r="5194" b="7512">for</wd>

<space/>

<wd l="5314" t="7354" r="5784" b="7512">OOV</wd>

<space/>

</ln>

<ln l="1416" t="7608" r="2002" b="7766" baseLine="7762" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="7608" r="2002" b="7766">words.</wd>

</ln>

</para>

<para l="1411" t="8117" r="5808" b="14126" alignment="justified" li="72" ri="72" spaceBefore="283" lsp="exactly" lspExact="253" language="en">

<ln l="1421" t="8117" r="5789" b="8318" baseLine="8266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8117" r="1762" b="8275">Our</wd>

<space/>

<wd l="1910" t="8117" r="2717" b="8318">approach</wd>

<space/>

<wd l="2866" t="8117" r="3187" b="8275">and</wd>

<space/>

<wd l="3336" t="8117" r="4181" b="8275">SMT-like</wd>

<space/>

<wd l="4339" t="8136" r="4944" b="8318">system</wd>

<space/>

<wd l="5088" t="8117" r="5789" b="8275">attained</wd>

<space/>

</ln>

<ln l="1416" t="8366" r="5770" b="8568" baseLine="8520" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="8371" r="1992" b="8525">BLEU</wd>

<space/>

<wd l="2098" t="8414" r="2635" b="8525">scores</wd>

<space/>

<wd l="2741" t="8366" r="2942" b="8525">of</wd>

<space/>

<wd l="3024" t="8366" r="3504" b="8525">83.12</wd>

<space/>

<wd l="3610" t="8366" r="3926" b="8525">and</wd>

<space/>

<wd l="4022" t="8366" r="4565" b="8558">78.81,</wd>

<space/>

<wd l="4666" t="8366" r="5770" b="8568">respectively.</wd>

<space/>

</ln>

<ln l="1421" t="8621" r="5779" b="8822" baseLine="8770" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8621" r="1805" b="8779">This</wd>

<space/>

<wd l="1872" t="8621" r="2362" b="8779">result</wd>

<space/>

<wd l="2419" t="8669" r="3005" b="8822">proves</wd>

<space/>

<wd l="3067" t="8621" r="3398" b="8779">that</wd>

<space/>

<wd l="3466" t="8621" r="3619" b="8774">if</wd>

<space/>

<wd l="3658" t="8669" r="3912" b="8779">we</wd>

<space/>

<wd l="3979" t="8621" r="4747" b="8822">integrate</wd>

<space/>

<wd l="4810" t="8621" r="5246" b="8779">three</wd>

<space/>

<wd l="5304" t="8621" r="5779" b="8822">prob-</wd>

</ln>

<ln l="1421" t="8875" r="5779" b="9077" baseLine="9024" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8875" r="1982" b="9077">ability</wd>

<space/>

<wd l="2069" t="8923" r="2611" b="9034">scores</wd>

<space/>

<wd l="2698" t="8875" r="2966" b="9034">via</wd>

<space/>

<wd l="3048" t="8880" r="3768" b="9067">Maxent,</wd>

<space/>

<wd l="3850" t="8875" r="4752" b="9077">promising</wd>

<space/>

<wd l="4829" t="8875" r="5779" b="9034">normaliza-</wd>

</ln>

<ln l="1416" t="9125" r="5779" b="9326" baseLine="9278" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="9125" r="1762" b="9283">tion</wd>

<space/>

<wd l="1858" t="9173" r="2635" b="9326">accuracy</wd>

<space/>

<wd l="2731" t="9173" r="3034" b="9283">can</wd>

<space/>

<wd l="3120" t="9125" r="3331" b="9283">be</wd>

<space/>

<wd l="3427" t="9125" r="4229" b="9283">obtained.</wd>

<space/>

<wd l="4334" t="9125" r="4718" b="9283">This</wd>

<space/>

<wd l="4814" t="9125" r="5304" b="9283">result</wd>

<space/>

<wd l="5400" t="9173" r="5779" b="9283">con-</wd>

</ln>

<ln l="1421" t="9379" r="5789" b="9581" baseLine="9533" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9379" r="1877" b="9538">firms</wd>

<space/>

<wd l="2006" t="9379" r="2338" b="9538">that</wd>

<space/>

<wd l="2467" t="9427" r="2563" b="9538">a</wd>

<space/>

<wd l="2688" t="9379" r="3907" b="9538">normalization</wd>

<space/>

<wd l="4042" t="9398" r="4646" b="9581">system</wd>

<space/>

<wd l="4771" t="9379" r="5789" b="9538">constructed</wd>

<space/>

</ln>

<ln l="1411" t="9634" r="5779" b="9835" baseLine="9782" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9634" r="1925" b="9792">based</wd>

<space/>

<wd l="1997" t="9682" r="2213" b="9792">on</wd>

<space/>

<wd l="2280" t="9634" r="2549" b="9792">the</wd>

<space/>

<wd l="2621" t="9638" r="3293" b="9792">Maxent</wd>

<space/>

<wd l="3355" t="9634" r="4142" b="9835">principle</wd>

<space/>

<wd l="4219" t="9682" r="4512" b="9792">can</wd>

<space/>

<wd l="4598" t="9682" r="5232" b="9835">surpass</wd>

<space/>

<wd l="5318" t="9653" r="5779" b="9792">state-</wd>

</ln>

<ln l="1421" t="9888" r="5779" b="10090" baseLine="10037" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9888" r="2246" b="10046">of-the-art</wd>

<space/>

<wd l="2347" t="9907" r="3077" b="10090">systems.</wd>

<space/>

<wd l="3178" t="9893" r="4037" b="10080">However,</wd>

<space/>

<wd l="4142" t="9888" r="4752" b="10046">several</wd>

<space/>

<wd l="4848" t="9888" r="5779" b="10046">drawbacks</wd>

<space/>

</ln>

<ln l="1421" t="10138" r="5784" b="10339" baseLine="10291" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10138" r="1622" b="10296">of</wd>

<space/>

<wd l="1690" t="10186" r="1982" b="10296">our</wd>

<space/>

<wd l="2064" t="10138" r="2731" b="10296">method</wd>

<space/>

<wd l="2808" t="10186" r="3235" b="10296">were</wd>

<space/>

<wd l="3326" t="10138" r="4142" b="10296">disclosed</wd>

<space/>

<wd l="4219" t="10138" r="4445" b="10339">by</wd>

<space/>

<wd l="4531" t="10138" r="5434" b="10339">inspecting</wd>

<space/>

<wd l="5515" t="10138" r="5784" b="10296">the</wd>

<space/>

</ln>

<ln l="1421" t="10392" r="5779" b="10594" baseLine="10541" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10411" r="1982" b="10594">output</wd>

<space/>

<wd l="2054" t="10392" r="2256" b="10550">of</wd>

<space/>

<wd l="2299" t="10392" r="2568" b="10550">the</wd>

<space/>

<wd l="2650" t="10411" r="3288" b="10594">system.</wd>

<space/>

<wd l="3374" t="10392" r="3715" b="10550">The</wd>

<space/>

<wd l="3782" t="10411" r="4210" b="10550">most</wd>

<space/>

<wd l="4277" t="10392" r="5179" b="10550">noticeable</wd>

<space/>

<wd l="5256" t="10440" r="5568" b="10550">one</wd>

<space/>

<wd l="5640" t="10392" r="5779" b="10550">is</wd>

<space/>

</ln>

<ln l="1416" t="10646" r="5784" b="10848" baseLine="10795" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10646" r="1747" b="10805">that</wd>

<space/>

<wd l="1824" t="10646" r="2093" b="10805">the</wd>

<space/>

<wd l="2174" t="10646" r="2976" b="10848">approach</wd>

<space/>

<wd l="3053" t="10646" r="3427" b="10805">fails</wd>

<space/>

<wd l="3504" t="10646" r="3979" b="10805">when</wd>

<space/>

<wd l="4056" t="10646" r="4766" b="10848">tackling</wd>

<space/>

<wd l="4843" t="10694" r="5232" b="10848">very</wd>

<space/>

<wd l="5309" t="10646" r="5784" b="10848">noisy</wd>

<space/>

</ln>

<ln l="1416" t="10896" r="5770" b="11098" baseLine="11050" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10915" r="1795" b="11088">text,</wd>

<space/>

<wd l="1858" t="10896" r="2189" b="11054">that</wd>

<space/>

<wd l="2251" t="10896" r="2438" b="11088">is,</wd>

<space/>

<wd l="2506" t="10896" r="3038" b="11098">ample</wd>

<space/>

<wd l="3096" t="10944" r="3595" b="11098">usage</wd>

<space/>

<wd l="3658" t="10896" r="3859" b="11054">of</wd>

<space/>

<wd l="3893" t="10896" r="4363" b="11054">OOV</wd>

<space/>

<wd l="4421" t="10896" r="4958" b="11054">words</wd>

<space/>

<wd l="5021" t="10896" r="5189" b="11050">in</wd>

<space/>

<wd l="5251" t="10944" r="5347" b="11054">a</wd>

<space/>

<wd l="5400" t="10915" r="5770" b="11054">text.</wd>

<space/>

</ln>

<ln l="1416" t="11150" r="5808" b="11352" baseLine="11304" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11155" r="1723" b="11309">We</wd>

<space/>

<wd l="1819" t="11150" r="2419" b="11309">altered</wd>

<space/>

<wd l="2506" t="11198" r="2798" b="11309">our</wd>

<space/>

<wd l="2890" t="11150" r="3494" b="11309">dataset</wd>

<space/>

<wd l="3581" t="11170" r="3749" b="11309">to</wd>

<space/>

<wd l="3840" t="11150" r="4253" b="11309">have</wd>

<space/>

<wd l="4344" t="11150" r="4910" b="11352">higher</wd>

<space/>

<wd l="5002" t="11150" r="5506" b="11309">levels</wd>

<space/>

<wd l="5606" t="11150" r="5808" b="11309">of</wd>

<space/>

</ln>

<ln l="1416" t="11405" r="5774" b="11606" baseLine="11554" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11405" r="1882" b="11563">noise</wd>

<space/>

<wd l="1987" t="11405" r="2462" b="11606">using</wd>

<space/>

<wd l="2568" t="11453" r="2774" b="11563">an</wd>

<space/>

<wd l="2880" t="11405" r="3686" b="11606">approach</wd>

<space/>

<wd l="3792" t="11405" r="4733" b="11563">introduced</wd>

<space/>

<wd l="4824" t="11405" r="5050" b="11606">by</wd>

<space/>

<wd l="5155" t="11405" r="5774" b="11597">Gadde,</wd>

<space/>

</ln>

<ln l="1421" t="11654" r="5784" b="11856" baseLine="11803" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11654" r="2174" b="11846">Goutam,</wd>

<space/>

<wd l="2328" t="11654" r="2808" b="11846">Shah,</wd>

<space/>

<wd l="2957" t="11659" r="3955" b="11856">Bayyarapu,</wd>

<space/>

<wd l="4109" t="11654" r="4426" b="11813">and</wd>

<space/>

<wd l="4570" t="11654" r="5784" b="11813">Subramaniam</wd>

<space/>

</ln>

<ln l="1421" t="11909" r="5770" b="12110" baseLine="12058" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11909" r="2054" b="12110">(2011),</wd>

<space/>

<wd l="2136" t="11909" r="2674" b="12067">which</wd>

<space/>

<wd l="2750" t="11909" r="3662" b="12110">artificially</wd>

<space/>

<wd l="3739" t="11928" r="4560" b="12110">generates</wd>

<space/>

<wd l="4646" t="11909" r="5117" b="12067">OOV</wd>

<space/>

<wd l="5189" t="11909" r="5770" b="12067">words.</wd>

<space/>

</ln>

<ln l="1421" t="12163" r="5779" b="12365" baseLine="12312" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="12163" r="1584" b="12317">If</wd>

<space/>

<wd l="1661" t="12163" r="1930" b="12322">the</wd>

<space/>

<wd l="2030" t="12182" r="2986" b="12365">percentage</wd>

<space/>

<wd l="3096" t="12163" r="3293" b="12322">of</wd>

<space/>

<wd l="3379" t="12163" r="3850" b="12322">OOV</wd>

<space/>

<wd l="3950" t="12163" r="4483" b="12322">words</wd>

<space/>

<wd l="4594" t="12211" r="5222" b="12322">crosses</wd>

<space/>

<wd l="5328" t="12163" r="5779" b="12355">45%,</wd>

<space/>

</ln>

<ln l="1416" t="12418" r="5789" b="12619" baseLine="12566" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12418" r="1685" b="12576">the</wd>

<space/>

<wd l="1757" t="12466" r="2534" b="12619">accuracy</wd>

<space/>

<wd l="2606" t="12418" r="2808" b="12576">of</wd>

<space/>

<wd l="2851" t="12418" r="3120" b="12576">the</wd>

<space/>

<wd l="3187" t="12418" r="3850" b="12576">method</wd>

<space/>

<wd l="3917" t="12418" r="4829" b="12619">drastically</wd>

<space/>

<wd l="4901" t="12418" r="5381" b="12619">drops</wd>

<space/>

<wd l="5448" t="12437" r="5616" b="12576">to</wd>

<space/>

<wd l="5693" t="12466" r="5789" b="12576">a</wd>

<space/>

</ln>

<ln l="1416" t="12667" r="5779" b="12826" baseLine="12816" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12672" r="1992" b="12826">BLEU</wd>

<space/>

<wd l="2078" t="12715" r="2534" b="12826">score</wd>

<space/>

<wd l="2621" t="12667" r="2818" b="12826">of</wd>

<space/>

<wd l="2880" t="12667" r="3202" b="12826">less</wd>

<space/>

<wd l="3283" t="12667" r="3658" b="12826">than</wd>

<space/>

<wd l="3749" t="12667" r="4003" b="12826">65.</wd>

<space/>

<wd l="4094" t="12667" r="4819" b="12826">Another</wd>

<space/>

<wd l="4901" t="12667" r="5779" b="12826">shortcom-</wd>

</ln>

<ln l="1421" t="12922" r="5779" b="13123" baseLine="13070" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="12922" r="1699" b="13123">ing</wd>

<space/>

<wd l="1790" t="12922" r="1992" b="13080">of</wd>

<space/>

<wd l="2059" t="12970" r="2352" b="13080">our</wd>

<space/>

<wd l="2443" t="12922" r="3245" b="13123">approach</wd>

<space/>

<wd l="3336" t="12922" r="3475" b="13080">is</wd>

<space/>

<wd l="3562" t="12922" r="3893" b="13080">that</wd>

<space/>

<wd l="3984" t="12922" r="4104" b="13080">it</wd>

<space/>

<wd l="4195" t="12922" r="4334" b="13080">is</wd>

<space/>

<wd l="4426" t="12941" r="4704" b="13080">not</wd>

<space/>

<wd l="4795" t="12922" r="5155" b="13080">able</wd>

<space/>

<wd l="5246" t="12941" r="5414" b="13080">to</wd>

<space/>

<wd l="5510" t="12922" r="5779" b="13080">ad-</wd>

</ln>

<ln l="1421" t="13176" r="5770" b="13378" baseLine="13325" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13176" r="1867" b="13334">dress</wd>

<space/>

<wd l="1997" t="13176" r="2866" b="13334">combined</wd>

<space/>

<wd l="2986" t="13176" r="3518" b="13334">words</wd>

<space/>

<wd l="3648" t="13176" r="3965" b="13334">and</wd>

<space/>

<wd l="4090" t="13176" r="5266" b="13334">abbreviations</wd>

<space/>

<wd l="5400" t="13176" r="5770" b="13378">(e.g.</wd>

<space/>

</ln>

<ln l="1421" t="13411" r="5779" b="13646" baseLine="13594">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1421" t="13445" r="1766" b="13603">alot</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="1838" t="13478" r="2045" b="13560">—</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2131" t="13498" r="2232" b="13603">a</wd>

<space/>

</run>

<wd l="2309" t="13445" r="2587" b="13637"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">lot</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2669" t="13445" r="2986" b="13603">btw</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="3058" t="13478" r="3264" b="13560">—</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3350" t="13445" r="3557" b="13646">by</wd>

<space/>

<wd l="3629" t="13445" r="3893" b="13603">the</wd>

<space/>

</run>

<wd l="3965" t="13445" r="4387" b="13646"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">way</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4464" t="13445" r="5160" b="13603">because</wd>

<space/>

<wd l="5237" t="13445" r="5779" b="13603">candi-</wd>

</run>

</ln>

<ln l="1421" t="13714" r="5779" b="13915" baseLine="13862" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13714" r="1786" b="13872">date</wd>

<space/>

<wd l="1862" t="13714" r="2789" b="13915">generation</wd>

<space/>

<wd l="2861" t="13714" r="3518" b="13872">module</wd>

<space/>

<wd l="3600" t="13714" r="4099" b="13872">forms</wd>

<space/>

<wd l="4186" t="13714" r="4574" b="13915">only</wd>

<space/>

<wd l="4656" t="13714" r="5170" b="13915">single</wd>

<space/>

<wd l="5246" t="13714" r="5779" b="13872">words</wd>

<space/>

</ln>

<ln l="1421" t="13968" r="2707" b="14126" baseLine="14117" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13968" r="1680" b="14126">for</wd>

<space/>

<wd l="1738" t="13968" r="2136" b="14126">each</wd>

<space/>

<wd l="2194" t="13968" r="2707" b="14126">OOV.</wd>

</ln>

</para>

<para l="1421" t="14424" r="2995" b="14597" alignment="left" li="72" ri="72" spaceBefore="206" lsp="exactly" lspExact="279" language="en">

<ln l="1421" t="14424" r="2995" b="14597" baseLine="14592" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="19">

<wd l="1421" t="14429" r="1531" b="14597">7</wd>

<space/>

<wd l="1858" t="14429" r="2995" b="14597">Conclusion</wd>

</ln>

</para>

<para l="1421" t="14856" r="5784" b="15307" alignment="justified" li="72" ri="72" spaceBefore="151" lsp="exactly" lspExact="248" language="en">

<ln l="1421" t="14856" r="5784" b="15058" baseLine="15005" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14861" r="1598" b="15010">In</wd>

<space/>

<wd l="1666" t="14856" r="1982" b="15014">this</wd>

<space/>

<wd l="2054" t="14904" r="2597" b="15058">paper,</wd>

<space/>

<wd l="2674" t="14904" r="2928" b="15014">we</wd>

<space/>

<wd l="3000" t="14856" r="3408" b="15014">have</wd>

<space/>

<wd l="3475" t="14856" r="4334" b="15058">presented</wd>

<space/>

<wd l="4402" t="14904" r="4498" b="15014">a</wd>

<space/>

<wd l="4565" t="14856" r="5784" b="15014">normalization</wd>

<space/>

</ln>

<ln l="1421" t="15106" r="5770" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="15106" r="2227" b="15307">approach</wd>

<space/>

<wd l="2290" t="15106" r="2798" b="15264">based</wd>

<space/>

<wd l="2866" t="15154" r="3082" b="15264">on</wd>

<space/>

<wd l="3144" t="15106" r="3413" b="15264">the</wd>

<space/>

<wd l="3480" t="15106" r="4373" b="15264">maximum</wd>

<space/>

<wd l="4440" t="15125" r="5112" b="15307">entropy</wd>

<space/>

<wd l="5179" t="15106" r="5770" b="15264">model.</wd>

</ln>

</para>

</column>

<column l="6115" t="1417" r="10517" b="13694">

<para l="6115" t="1464" r="10512" b="5669" alignment="justified" lsp="exactly" lspExact="252" language="en">

<ln l="6125" t="1464" r="10483" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1464" r="6509" b="1622">This</wd>

<space/>

<wd l="6610" t="1464" r="7411" b="1666">approach</wd>

<space/>

<wd l="7498" t="1464" r="8256" b="1666">provides</wd>

<space/>

<wd l="8357" t="1512" r="8453" b="1622">a</wd>

<space/>

<wd l="8539" t="1464" r="9168" b="1622">unified</wd>

<space/>

<wd l="9259" t="1464" r="9806" b="1666">layout</wd>

<space/>

<wd l="9898" t="1464" r="10157" b="1622">for</wd>

<space/>

<wd l="10248" t="1464" r="10483" b="1618">in-</wd>

</ln>

<ln l="6125" t="1714" r="10483" b="1915" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1714" r="7138" b="1915">corporating</wd>

<space/>

<wd l="7229" t="1714" r="7982" b="1872">different</wd>

<space/>

<wd l="8078" t="1762" r="8722" b="1872">sources</wd>

<space/>

<wd l="8818" t="1714" r="9019" b="1872">of</wd>

<space/>

<wd l="9086" t="1714" r="9773" b="1872">features</wd>

<space/>

<wd l="9864" t="1733" r="10032" b="1872">to</wd>

<space/>

<wd l="10123" t="1762" r="10483" b="1872">nor-</wd>

</ln>

<ln l="6120" t="1968" r="10488" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="1968" r="6706" b="2126">malize</wd>

<space/>

<wd l="6768" t="1968" r="7416" b="2126">Twitter</wd>

<space/>

<wd l="7474" t="2016" r="8347" b="2170">messages.</wd>

<space/>

<wd l="8419" t="1968" r="8760" b="2126">Our</wd>

<space/>

<wd l="8813" t="1968" r="9629" b="2170">proposed</wd>

<space/>

<wd l="9686" t="1968" r="10488" b="2170">approach</wd>

<space/>

</ln>

<ln l="6125" t="2222" r="10488" b="2424" baseLine="2371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="2222" r="6816" b="2381">consists</wd>

<space/>

<wd l="6888" t="2222" r="7090" b="2381">of</wd>

<space/>

<wd l="7133" t="2222" r="7570" b="2381">three</wd>

<space/>

<wd l="7646" t="2242" r="8218" b="2424">stages:</wd>

<space/>

<wd l="8299" t="2222" r="9571" b="2424">preprocessing,</wd>

<space/>

<wd l="9648" t="2222" r="10488" b="2381">candidate</wd>

<space/>

</ln>

<ln l="6125" t="2472" r="10483" b="2674" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="2472" r="7099" b="2674">generation,</wd>

<space/>

<wd l="7262" t="2472" r="7584" b="2630">and</wd>

<space/>

<wd l="7733" t="2472" r="8568" b="2630">candidate</wd>

<space/>

<wd l="8731" t="2472" r="9547" b="2630">selection.</wd>

<space/>

<wd l="9715" t="2472" r="10051" b="2630">The</wd>

<space/>

<wd l="10210" t="2520" r="10483" b="2674">ap-</wd>

</ln>

<ln l="6115" t="2726" r="10488" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="2726" r="6720" b="2928">proach</wd>

<space/>

<wd l="6787" t="2726" r="6922" b="2885">is</wd>

<space/>

<wd l="6989" t="2726" r="7541" b="2885">robust</wd>

<space/>

<wd l="7598" t="2746" r="7766" b="2885">to</wd>

<space/>

<wd l="7829" t="2726" r="8702" b="2885">normalize</wd>

<space/>

<wd l="8765" t="2774" r="9379" b="2885">unseen</wd>

<space/>

<wd l="9442" t="2726" r="9974" b="2885">words</wd>

<space/>

<wd l="10046" t="2726" r="10488" b="2885">since</wd>

<space/>

</ln>

<ln l="6125" t="2981" r="10488" b="3182" baseLine="3130" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="2981" r="6326" b="3139">its</wd>

<space/>

<wd l="6427" t="2981" r="7262" b="3139">candidate</wd>

<space/>

<wd l="7363" t="2981" r="8290" b="3182">generation</wd>

<space/>

<wd l="8390" t="3000" r="8827" b="3182">stage</wd>

<space/>

<wd l="8928" t="2981" r="9326" b="3139">does</wd>

<space/>

<wd l="9422" t="3000" r="9701" b="3139">not</wd>

<space/>

<wd l="9792" t="2981" r="10488" b="3182">practice</wd>

<space/>

</ln>

<ln l="6120" t="3235" r="10488" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3235" r="7666" b="3437">machine-learning</wd>

<space/>

<wd l="7752" t="3235" r="8539" b="3394">methods.</wd>

<space/>

<wd l="8645" t="3240" r="8818" b="3389">In</wd>

<space/>

<wd l="8909" t="3235" r="9178" b="3394">the</wd>

<space/>

<wd l="9264" t="3235" r="10488" b="3437">preprocessing</wd>

<space/>

</ln>

<ln l="6130" t="3485" r="10493" b="3686" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="3504" r="6624" b="3686">stage,</wd>

<space/>

<wd l="6710" t="3485" r="7114" b="3643">after</wd>

<space/>

<wd l="7186" t="3485" r="8006" b="3686">trimming</wd>

<space/>

<wd l="8083" t="3533" r="8947" b="3643">erroneous</wd>

<space/>

<wd l="9029" t="3485" r="10090" b="3686">whitespaces</wd>

<space/>

<wd l="10176" t="3485" r="10493" b="3643">and</wd>

<space/>

</ln>

<ln l="6120" t="3739" r="10488" b="3931" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3739" r="7258" b="3931">tokenization,</wd>

<space/>

<wd l="7382" t="3739" r="7853" b="3898">OOV</wd>

<space/>

<wd l="7968" t="3739" r="8501" b="3898">words</wd>

<space/>

<wd l="8621" t="3787" r="8880" b="3898">are</wd>

<space/>

<wd l="9000" t="3739" r="9734" b="3898">detected</wd>

<space/>

<wd l="9840" t="3739" r="10114" b="3898">via</wd>

<space/>

<wd l="10219" t="3739" r="10488" b="3898">the</wd>

<space/>

</ln>

<ln l="6125" t="3994" r="10483" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3994" r="6595" b="4152">GNU</wd>

<space/>

<wd l="6696" t="3994" r="7272" b="4195">Aspell</wd>

<space/>

<wd l="7378" t="3994" r="8309" b="4195">dictionary.</wd>

<space/>

<wd l="8414" t="3994" r="9466" b="4152">Normalized</wd>

<space/>

<wd l="9566" t="3994" r="10483" b="4152">candidates</wd>

<space/>

</ln>

<ln l="6125" t="4243" r="10493" b="4445" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="4291" r="6389" b="4402">are</wd>

<space/>

<wd l="6470" t="4243" r="7330" b="4445">generated</wd>

<space/>

<wd l="7402" t="4243" r="7656" b="4402">for</wd>

<space/>

<wd l="7733" t="4243" r="8131" b="4402">each</wd>

<space/>

<wd l="8213" t="4243" r="8683" b="4402">OOV</wd>

<space/>

<wd l="8760" t="4243" r="9216" b="4402">word</wd>

<space/>

<wd l="9293" t="4243" r="9461" b="4397">in</wd>

<space/>

<wd l="9533" t="4243" r="9802" b="4402">the</wd>

<space/>

<wd l="9888" t="4243" r="10493" b="4402">second</wd>

<space/>

</ln>

<ln l="6130" t="4498" r="10483" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="4517" r="6571" b="4699">stage</wd>

<space/>

<wd l="6672" t="4498" r="7517" b="4699">regarding</wd>

<space/>

<wd l="7613" t="4517" r="7781" b="4656">to</wd>

<space/>

<wd l="7886" t="4498" r="8520" b="4690">lexical,</wd>

<space/>

<wd l="8616" t="4498" r="9538" b="4699">phonemic,</wd>

<space/>

<wd l="9648" t="4498" r="9970" b="4656">and</wd>

<space/>

<wd l="10061" t="4546" r="10483" b="4656">mor-</wd>

</ln>

<ln l="6115" t="4752" r="10488" b="4954" baseLine="4901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="4752" r="7315" b="4954">phophonemic</wd>

<space/>

<wd l="7430" t="4752" r="8443" b="4910">similarities.</wd>

<space/>

<wd l="8563" t="4752" r="9043" b="4910">Since</wd>

<space/>

<wd l="9149" t="4752" r="10488" b="4954">code-switching</wd>

<space/>

</ln>

<ln l="6115" t="5006" r="10488" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="5006" r="6854" b="5165">between</wd>

<space/>

<wd l="6941" t="5006" r="7507" b="5208">Malay</wd>

<space/>

<wd l="7594" t="5006" r="7915" b="5165">and</wd>

<space/>

<wd l="7992" t="5006" r="8664" b="5208">English</wd>

<space/>

<wd l="8755" t="5006" r="8894" b="5165">is</wd>

<space/>

<wd l="8986" t="5054" r="9379" b="5208">very</wd>

<space/>

<wd l="9466" t="5054" r="10229" b="5165">common</wd>

<space/>

<wd l="10320" t="5006" r="10488" b="5160">in</wd>

<space/>

</ln>

<ln l="6125" t="5256" r="10512" b="5458" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5304" r="6418" b="5414">our</wd>

<space/>

<wd l="6518" t="5256" r="7171" b="5448">dataset,</wd>

<space/>

<wd l="7277" t="5256" r="7546" b="5414">the</wd>

<space/>

<wd l="7637" t="5256" r="8410" b="5458">potential</wd>

<space/>

<wd l="8506" t="5256" r="9182" b="5458">English</wd>

<space/>

<wd l="9278" t="5256" r="10210" b="5414">translation</wd>

<space/>

<wd l="10310" t="5256" r="10512" b="5414">of</wd>

<space/>

</ln>

<ln l="6125" t="5510" r="10176" b="5669" baseLine="5659" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5510" r="6595" b="5669">OOV</wd>

<space/>

<wd l="6653" t="5510" r="7190" b="5669">words</wd>

<space/>

<wd l="7253" t="5510" r="7392" b="5669">is</wd>

<space/>

<wd l="7454" t="5510" r="7800" b="5669">also</wd>

<space/>

<wd l="7862" t="5510" r="8390" b="5669">added</wd>

<space/>

<wd l="8438" t="5530" r="8602" b="5669">to</wd>

<space/>

<wd l="8664" t="5510" r="8933" b="5669">the</wd>

<space/>

<wd l="8990" t="5510" r="9830" b="5669">candidate</wd>

<space/>

<wd l="9893" t="5530" r="10176" b="5669">set.</wd>

</ln>

</para>

<para l="6120" t="5765" r="10493" b="8746" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="5765" r="10488" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5770" r="6528" b="5918">In</wd>

<space/>

<wd l="6590" t="5765" r="6859" b="5923">the</wd>

<space/>

<wd l="6926" t="5765" r="7349" b="5923">third</wd>

<space/>

<wd l="7411" t="5784" r="7901" b="5966">stage,</wd>

<space/>

<wd l="7973" t="5765" r="8410" b="5923">three</wd>

<space/>

<wd l="8477" t="5765" r="9456" b="5923">conditional</wd>

<space/>

<wd l="9518" t="5765" r="10488" b="5966">probability</wd>

<space/>

</ln>

<ln l="6130" t="6014" r="10483" b="6216" baseLine="6163" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6062" r="6667" b="6173">scores</wd>

<space/>

<wd l="6778" t="6062" r="7037" b="6173">are</wd>

<space/>

<wd l="7142" t="6014" r="7901" b="6216">assigned</wd>

<space/>

<wd l="7992" t="6034" r="8160" b="6173">to</wd>

<space/>

<wd l="8266" t="6014" r="8664" b="6173">each</wd>

<space/>

<wd l="8770" t="6014" r="9648" b="6173">candidate:</wd>

<space/>

<wd l="9792" t="6014" r="9946" b="6216">1)</wd>

<space/>

<wd l="10046" t="6014" r="10483" b="6216">posi-</wd>

</ln>

<ln l="6120" t="6269" r="10483" b="6470" baseLine="6418" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6269" r="6619" b="6427">tional</wd>

<space/>

<wd l="6696" t="6269" r="7464" b="6470">indexing</wd>

<space/>

<wd l="7531" t="6269" r="8357" b="6427">considers</wd>

<space/>

<wd l="8429" t="6269" r="8693" b="6427">the</wd>

<space/>

<wd l="8760" t="6269" r="9730" b="6470">probability</wd>

<space/>

<wd l="9802" t="6269" r="10003" b="6427">of</wd>

<space/>

<wd l="10042" t="6269" r="10483" b="6470">posi-</wd>

</ln>

<ln l="6120" t="6523" r="10483" b="6682" baseLine="6672" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6523" r="6619" b="6682">tional</wd>

<space/>

<wd l="6715" t="6523" r="7502" b="6682">locations</wd>

<space/>

<wd l="7594" t="6523" r="7795" b="6682">of</wd>

<space/>

<wd l="7858" t="6542" r="8266" b="6682">term</wd>

<space/>

<wd l="8352" t="6571" r="9394" b="6682">occurrences</wd>

<space/>

<wd l="9490" t="6523" r="10008" b="6682">inside</wd>

<space/>

<wd l="10104" t="6523" r="10483" b="6682">doc-</wd>

</ln>

<ln l="6120" t="6773" r="10493" b="6974" baseLine="6922" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6792" r="6806" b="6965">uments,</wd>

<space/>

<wd l="7147" t="6773" r="7320" b="6974">2)</wd>

<space/>

<wd l="7661" t="6773" r="9283" b="6974">dependency-based</wd>

<space/>

<wd l="9614" t="6773" r="10493" b="6974">frequency</wd>

<space/>

</ln>

<ln l="6120" t="7027" r="10488" b="7229" baseLine="7176" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="7075" r="6936" b="7186">measures</wd>

<space/>

<wd l="7061" t="7027" r="7330" b="7186">the</wd>

<space/>

<wd l="7445" t="7027" r="8419" b="7229">probability</wd>

<space/>

<wd l="8539" t="7027" r="8741" b="7186">of</wd>

<space/>

<wd l="8837" t="7027" r="9792" b="7229">prevalence</wd>

<space/>

<wd l="9922" t="7027" r="10118" b="7186">of</wd>

<space/>

<wd l="10219" t="7027" r="10488" b="7186">the</wd>

<space/>

</ln>

<ln l="6125" t="7282" r="10493" b="7483" baseLine="7430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7282" r="7176" b="7483">dependency</wd>

<space/>

<wd l="7248" t="7282" r="7920" b="7440">relation</wd>

<space/>

<wd l="8002" t="7282" r="8198" b="7440">of</wd>

<space/>

<wd l="8251" t="7282" r="8789" b="7440">words</wd>

<space/>

<wd l="8866" t="7301" r="9034" b="7440">to</wd>

<space/>

<wd l="9115" t="7282" r="9514" b="7440">each</wd>

<space/>

<wd l="9595" t="7282" r="10090" b="7474">other,</wd>

<space/>

<wd l="10176" t="7282" r="10493" b="7440">and</wd>

<space/>

</ln>

<ln l="6125" t="7536" r="10488" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7536" r="6298" b="7738">3)</wd>

<space/>

<wd l="6398" t="7536" r="6667" b="7694">the</wd>

<space/>

<wd l="6763" t="7536" r="7550" b="7738">language</wd>

<space/>

<wd l="7646" t="7536" r="8189" b="7694">model</wd>

<space/>

<wd l="8290" t="7536" r="9067" b="7694">indicates</wd>

<space/>

<wd l="9163" t="7536" r="9427" b="7694">the</wd>

<space/>

<wd l="9518" t="7536" r="10488" b="7738">probability</wd>

<space/>

</ln>

<ln l="6125" t="7786" r="10478" b="7987" baseLine="7934" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7786" r="6326" b="7944">of</wd>

<space/>

<wd l="6374" t="7786" r="7387" b="7944">distribution</wd>

<space/>

<wd l="7454" t="7786" r="7656" b="7944">of</wd>

<space/>

<wd l="7699" t="7786" r="7968" b="7944">the</wd>

<space/>

<wd l="8040" t="7834" r="8837" b="7987">sequence</wd>

<space/>

<wd l="8909" t="7786" r="9110" b="7944">of</wd>

<space/>

<wd l="9154" t="7786" r="9734" b="7944">words.</wd>

<space/>

<wd l="9811" t="7786" r="10478" b="7987">Finally,</wd>

<space/>

</ln>

<ln l="6120" t="8040" r="10488" b="8242" baseLine="8189" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="8040" r="6389" b="8198">the</wd>

<space/>

<wd l="6456" t="8040" r="6811" b="8198">best</wd>

<space/>

<wd l="6883" t="8040" r="7718" b="8198">candidate</wd>

<space/>

<wd l="7795" t="8040" r="7930" b="8198">is</wd>

<space/>

<wd l="8011" t="8040" r="8755" b="8198">selected.</wd>

<space/>

<wd l="8832" t="8040" r="9749" b="8198">Maximum</wd>

<space/>

<wd l="9816" t="8059" r="10488" b="8242">entropy</wd>

<space/>

</ln>

<ln l="6125" t="8294" r="10483" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="8294" r="6974" b="8496">integrates</wd>

<space/>

<wd l="7066" t="8294" r="7334" b="8453">the</wd>

<space/>

<wd l="7430" t="8294" r="8189" b="8453">obtained</wd>

<space/>

<wd l="8270" t="8294" r="9240" b="8496">probability</wd>

<space/>

<wd l="9341" t="8342" r="9878" b="8453">scores</wd>

<space/>

<wd l="9970" t="8314" r="10138" b="8453">to</wd>

<space/>

<wd l="10238" t="8342" r="10483" b="8453">es-</wd>

</ln>

<ln l="6120" t="8544" r="10430" b="8746" baseLine="8693" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="8544" r="6672" b="8702">timate</wd>

<space/>

<wd l="6725" t="8544" r="6994" b="8702">the</wd>

<space/>

<wd l="7051" t="8544" r="7771" b="8702">ultimate</wd>

<space/>

<wd l="7824" t="8544" r="8794" b="8746">probability</wd>

<space/>

<wd l="8851" t="8544" r="9053" b="8702">of</wd>

<space/>

<wd l="9091" t="8544" r="9490" b="8702">each</wd>

<space/>

<wd l="9547" t="8544" r="10430" b="8702">candidate.</wd>

</ln>

</para>

<para l="6120" t="8798" r="10493" b="11990" alignment="justified" spaceBefore="4" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6355" t="8798" r="10483" b="9000" baseLine="8947" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="8798" r="6691" b="8957">The</wd>

<space/>

<wd l="6758" t="8798" r="7560" b="9000">approach</wd>

<space/>

<wd l="7622" t="8798" r="7762" b="8957">is</wd>

<space/>

<wd l="7829" t="8798" r="8683" b="8957">examined</wd>

<space/>

<wd l="8736" t="8798" r="9216" b="9000">using</wd>

<space/>

<wd l="9278" t="8798" r="9768" b="8990">7,000</wd>

<space/>

<wd l="9826" t="8798" r="10483" b="9000">parallel</wd>

<space/>

</ln>

<ln l="6125" t="9048" r="10483" b="9250" baseLine="9202" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9048" r="6773" b="9206">Twitter</wd>

<space/>

<wd l="6845" t="9096" r="7723" b="9250">messages,</wd>

<space/>

<wd l="7800" t="9048" r="8338" b="9206">which</wd>

<space/>

<wd l="8414" t="9048" r="8554" b="9206">is</wd>

<space/>

<wd l="8635" t="9048" r="9010" b="9250">split</wd>

<space/>

<wd l="9082" t="9048" r="9418" b="9206">into</wd>

<space/>

<wd l="9504" t="9048" r="9984" b="9240">5,000</wd>

<space/>

<wd l="10061" t="9096" r="10483" b="9206">mes-</wd>

</ln>

<ln l="6130" t="9302" r="10483" b="9504" baseLine="9451" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9350" r="6595" b="9504">sages</wd>

<space/>

<wd l="6686" t="9302" r="6941" b="9461">for</wd>

<space/>

<wd l="7018" t="9302" r="7704" b="9504">training</wd>

<space/>

<wd l="7790" t="9302" r="8107" b="9461">and</wd>

<space/>

<wd l="8189" t="9302" r="8674" b="9494">2,000</wd>

<space/>

<wd l="8765" t="9302" r="9019" b="9461">for</wd>

<space/>

<wd l="9096" t="9302" r="9725" b="9504">testing.</wd>

<space/>

<wd l="9826" t="9302" r="10162" b="9461">The</wd>

<space/>

<wd l="10243" t="9350" r="10483" b="9461">re-</wd>

</ln>

<ln l="6130" t="9557" r="10488" b="9758" baseLine="9706" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9557" r="6442" b="9715">sult</wd>

<space/>

<wd l="6538" t="9557" r="6677" b="9715">is</wd>

<space/>

<wd l="6768" t="9557" r="7666" b="9758">promising</wd>

<space/>

<wd l="7757" t="9557" r="8515" b="9758">whereby</wd>

<space/>

<wd l="8606" t="9605" r="8861" b="9715">we</wd>

<space/>

<wd l="8962" t="9557" r="9629" b="9715">achieve</wd>

<space/>

<wd l="9730" t="9605" r="9826" b="9715">a</wd>

<space/>

<wd l="9912" t="9562" r="10488" b="9715">BLEU</wd>

<space/>

</ln>

<ln l="6130" t="9806" r="10488" b="10008" baseLine="9960" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9854" r="6586" b="9965">score</wd>

<space/>

<wd l="6653" t="9806" r="6854" b="9965">of</wd>

<space/>

<wd l="6907" t="9806" r="7387" b="9965">83.12</wd>

<space/>

<wd l="7459" t="9806" r="8078" b="10008">against</wd>

<space/>

<wd l="8141" t="9806" r="8410" b="9965">the</wd>

<space/>

<wd l="8467" t="9806" r="9192" b="9965">baseline</wd>

<space/>

<wd l="9259" t="9811" r="9878" b="9998">BLEU,</wd>

<space/>

<wd l="9950" t="9806" r="10488" b="9965">which</wd>

<space/>

</ln>

<ln l="6130" t="10061" r="10488" b="10262" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10109" r="6667" b="10219">scores</wd>

<space/>

<wd l="6768" t="10061" r="7306" b="10219">42.01.</wd>

<space/>

<wd l="7416" t="10066" r="7718" b="10219">We</wd>

<space/>

<wd l="7819" t="10061" r="8227" b="10219">have</wd>

<space/>

<wd l="8333" t="10061" r="9202" b="10262">compared</wd>

<space/>

<wd l="9298" t="10109" r="9590" b="10219">our</wd>

<space/>

<wd l="9686" t="10061" r="10488" b="10262">approach</wd>

<space/>

</ln>

<ln l="6120" t="10315" r="10483" b="10517" baseLine="10464" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10315" r="6514" b="10474">with</wd>

<space/>

<wd l="6624" t="10363" r="6720" b="10474">a</wd>

<space/>

<wd l="6830" t="10315" r="7675" b="10474">SMT-like</wd>

<space/>

<wd l="7786" t="10315" r="8587" b="10517">approach</wd>

<space/>

<wd l="8693" t="10315" r="9173" b="10517">using</wd>

<space/>

<wd l="9274" t="10315" r="9542" b="10474">the</wd>

<space/>

<wd l="9662" t="10363" r="10099" b="10474">same</wd>

<space/>

<wd l="10210" t="10315" r="10483" b="10474">da-</wd>

</ln>

<ln l="6120" t="10570" r="10493" b="10771" baseLine="10718" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10589" r="6571" b="10728">taset.</wd>

<space/>

<wd l="6677" t="10570" r="7013" b="10728">The</wd>

<space/>

<wd l="7109" t="10618" r="7886" b="10771">accuracy</wd>

<space/>

<wd l="7982" t="10570" r="8179" b="10728">of</wd>

<space/>

<wd l="8251" t="10570" r="8515" b="10728">the</wd>

<space/>

<wd l="8616" t="10570" r="9461" b="10728">SMT-like</wd>

<space/>

<wd l="9552" t="10618" r="9893" b="10728">was</wd>

<space/>

<wd l="9994" t="10570" r="10493" b="10728">lower</wd>

<space/>

</ln>

<ln l="6120" t="10819" r="10488" b="11021" baseLine="10973" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10819" r="6504" b="10978">than</wd>

<space/>

<wd l="6566" t="10867" r="6859" b="10978">our</wd>

<space/>

<wd l="6922" t="10819" r="7723" b="11021">approach</wd>

<space/>

<wd l="7786" t="10819" r="8112" b="11021">(i.e.</wd>

<space/>

<wd l="8189" t="10819" r="8654" b="10978">78.81</wd>

<space/>

<wd l="8741" t="10824" r="9312" b="10978">BLEU</wd>

<space/>

<wd l="9384" t="10867" r="9840" b="10978">score</wd>

<space/>

<wd l="9907" t="10819" r="10162" b="10978">for</wd>

<space/>

<wd l="10219" t="10819" r="10488" b="10978">the</wd>

<space/>

</ln>

<ln l="6130" t="11074" r="10483" b="11275" baseLine="11222" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="11074" r="7094" b="11275">SMT-like).</wd>

<space/>

<wd l="7166" t="11078" r="7478" b="11232">For</wd>

<space/>

<wd l="7541" t="11074" r="8059" b="11232">future</wd>

<space/>

<wd l="8122" t="11074" r="8621" b="11266">work,</wd>

<space/>

<wd l="8688" t="11122" r="8942" b="11232">we</wd>

<space/>

<wd l="9010" t="11074" r="9346" b="11232">will</wd>

<space/>

<wd l="9418" t="11074" r="10157" b="11232">examine</wd>

<space/>

<wd l="10219" t="11074" r="10483" b="11232">the</wd>

<space/>

</ln>

<ln l="6120" t="11328" r="10483" b="11530" baseLine="11477" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11333" r="6797" b="11486">Maxent</wd>

<space/>

<wd l="6864" t="11328" r="8088" b="11486">normalization</wd>

<space/>

<wd l="8160" t="11328" r="8962" b="11530">approach</wd>

<space/>

<wd l="9034" t="11328" r="9422" b="11486">with</wd>

<space/>

<wd l="9494" t="11376" r="9941" b="11486">more</wd>

<space/>

<wd l="10008" t="11328" r="10483" b="11530">prob-</wd>

</ln>

<ln l="6125" t="11578" r="10488" b="11779" baseLine="11731" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11578" r="6686" b="11779">ability</wd>

<space/>

<wd l="6749" t="11578" r="7613" b="11770">functions,</wd>

<space/>

<wd l="7690" t="11578" r="8083" b="11736">such</wd>

<space/>

<wd l="8150" t="11626" r="8323" b="11736">as</wd>

<space/>

<wd l="8390" t="11578" r="9557" b="11736">distributional</wd>

<space/>

<wd l="9624" t="11578" r="10488" b="11779">clustering</wd>

<space/>

</ln>

<ln l="6125" t="11832" r="8074" b="11990" baseLine="11986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11832" r="6446" b="11990">and</wd>

<space/>

<wd l="6504" t="11832" r="7272" b="11990">semantic</wd>

<space/>

<wd l="7339" t="11832" r="8074" b="11990">features.</wd>

</ln>

</para>

<para l="6120" t="12341" r="7992" b="12557" alignment="left" spaceBefore="244" lsp="exactly" lspExact="279" language="en">

<ln l="6120" t="12341" r="7992" b="12557" baseLine="12499" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="12341" r="7992" b="12557">Acknowledgments</wd>

</ln>

</para>

<para l="6115" t="12725" r="10488" b="13685" alignment="justified" spaceBefore="111" lsp="exactly" lspExact="250" language="en">

<ln l="6125" t="12725" r="10483" b="12926" baseLine="12874" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12725" r="6466" b="12883">The</wd>

<space/>

<wd l="6547" t="12725" r="7282" b="12883">research</wd>

<space/>

<wd l="7363" t="12725" r="7618" b="12883">for</wd>

<space/>

<wd l="7694" t="12725" r="8011" b="12883">this</wd>

<space/>

<wd l="8093" t="12773" r="8587" b="12926">paper</wd>

<space/>

<wd l="8669" t="12773" r="9005" b="12883">was</wd>

<space/>

<wd l="9096" t="12725" r="10032" b="12926">financially</wd>

<space/>

<wd l="10123" t="12773" r="10483" b="12926">sup-</wd>

</ln>

<ln l="6115" t="12974" r="10488" b="13176" baseLine="13128" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6115" t="12974" r="6691" b="13176">ported</wd>

<space/>

<wd l="6749" t="12974" r="6974" b="13176">by</wd>

<space/>

<wd l="7042" t="12974" r="7310" b="13133">the</wd>

<space/>

<wd l="7382" t="12974" r="8314" b="13176">University</wd>

<space/>

<wd l="8386" t="12974" r="8587" b="13133">of</wd>

<space/>

<wd l="8635" t="12974" r="9298" b="13176">Malaya</wd>

<space/>

<wd l="9365" t="12974" r="9907" b="13133">FRGS</wd>

<space/>

<wd l="9989" t="12974" r="10488" b="13133">Grant</wd>

<space/>

</ln>

<ln l="6125" t="13229" r="10483" b="13430" baseLine="13382" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13229" r="7546" b="13430">(FP021-2014B).</wd>

<space/>

<wd l="7642" t="13234" r="7944" b="13387">We</wd>

<space/>

<wd l="8030" t="13229" r="8525" b="13387">thank</wd>

<space/>

<wd l="8602" t="13229" r="9058" b="13387">Asad</wd>

<space/>

<wd l="9134" t="13229" r="9571" b="13387">Abdi</wd>

<space/>

<wd l="9662" t="13229" r="9917" b="13387">for</wd>

<space/>

<wd l="10003" t="13229" r="10483" b="13387">assis-</wd>

</ln>

<ln l="6120" t="13483" r="9043" b="13685" baseLine="13632" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="13502" r="6586" b="13642">tance</wd>

<space/>

<wd l="6643" t="13483" r="7032" b="13642">with</wd>

<space/>

<wd l="7094" t="13483" r="7901" b="13685">graphical</wd>

<space/>

<wd l="7968" t="13483" r="9043" b="13642">illustrations.</wd>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15736" r="6171" b="15977">

<para l="5804" t="15787" r="6138" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15787" r="6072" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="22">

<wd l="5870" t="15787" r="6072" b="15946">25</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1294" marginTop="1420" marginRight="1389" marginBottom="1302" offsetX="-8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1294" t="1420" r="10520" b="15033">

<column l="1294" t="1420" r="5897" b="14855">

<para l="1416" t="1474" r="2434" b="1642" alignment="left" li="72" ri="72" spaceBefore="6" lsp="exactly" lspExact="273" language="en">

<ln l="1416" t="1474" r="2434" b="1642" baseLine="1637" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="1474" r="2434" b="1642">Reference</wd>

</ln>

</para>

<para l="1416" t="1853" r="5784" b="3187" alignment="justified" li="360" ri="72" spaceBefore="112" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1416" t="1853" r="5784" b="2035" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="1853" r="1747" b="2026">Aw,</wd>

<space/>

<wd l="1862" t="1853" r="2098" b="2026">A.,</wd>

<space/>

<wd l="2213" t="1853" r="2765" b="2035">Zhang,</wd>

<space/>

<wd l="2875" t="1858" r="3149" b="2026">M.,</wd>

<space/>

<wd l="3259" t="1853" r="3691" b="2026">Xiao,</wd>

<space/>

<wd l="3806" t="1858" r="3979" b="2026">J.,</wd>

<space/>

<wd l="4094" t="1853" r="4238" b="1997">&amp;</wd>

<space/>

<wd l="4358" t="1853" r="4603" b="2026">Su,</wd>

<space/>

<wd l="4714" t="1858" r="4834" b="1997">J.</wd>

<space/>

<wd l="4954" t="1853" r="5525" b="2035">(2006).</wd>

<space/>

<wd l="5640" t="1853" r="5784" b="1992">A</wd>

<space/>

</ln>

<ln l="1646" t="2083" r="5784" b="2227" baseLine="2222" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="2083" r="2698" b="2227">Phrase-based</wd>

<space/>

<wd l="2774" t="2083" r="3552" b="2227">Statistical</wd>

<space/>

<wd l="3619" t="2083" r="4142" b="2227">Model</wd>

<space/>

<wd l="4214" t="2083" r="4450" b="2227">for</wd>

<space/>

<wd l="4522" t="2083" r="4901" b="2227">SMS</wd>

<space/>

<wd l="4978" t="2088" r="5342" b="2227">Text</wd>

<space/>

<wd l="5405" t="2088" r="5784" b="2227">Nor-</wd>

</ln>

<ln l="1646" t="2314" r="5784" b="2496" baseLine="2448" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="2314" r="2525" b="2458">malization.</wd>

<space/>

<wd l="2640" t="2318" r="2803" b="2453">In</wd>

<space/>

<wd l="2894" t="2314" r="3869" b="2496">Proceedings</wd>

<space/>

<wd l="3974" t="2314" r="4162" b="2458">of</wd>

<space/>

<wd l="4229" t="2314" r="4469" b="2458">the</wd>

<space/>

<wd l="4579" t="2314" r="5784" b="2458">COLING/ACL</wd>

<space/>

</ln>

<ln l="1651" t="2544" r="5774" b="2726" baseLine="2678" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2587" r="1848" b="2688">on</wd>

<space/>

<wd l="1925" t="2544" r="2347" b="2688">Main</wd>

<space/>

<wd l="2429" t="2544" r="3341" b="2688">Conference</wd>

<space/>

<wd l="3422" t="2549" r="3931" b="2688">Poster</wd>

<space/>

<wd l="4018" t="2544" r="4690" b="2688">Sessions</wd>

<space/>

<wd l="4776" t="2544" r="5078" b="2726">(pp.</wd>

<space/>

<wd l="5174" t="2544" r="5774" b="2726">33–40).</wd>

<space/>

</ln>

<ln l="1656" t="2774" r="5779" b="2957" baseLine="2909" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="2774" r="2664" b="2957">Stroudsburg,</wd>

<space/>

<wd l="2746" t="2774" r="3043" b="2947">PA,</wd>

<space/>

<wd l="3125" t="2774" r="3566" b="2918">USA:</wd>

<space/>

<wd l="3658" t="2774" r="4603" b="2918">Association</wd>

<space/>

<wd l="4680" t="2774" r="4910" b="2918">for</wd>

<space/>

<wd l="4987" t="2774" r="5779" b="2957">Computa-</wd>

</ln>

<ln l="1646" t="3005" r="3077" b="3187" baseLine="3139" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="3005" r="2098" b="3149">tional</wd>

<space/>

<wd l="2150" t="3005" r="3077" b="3187">Linguistics.</wd>

</ln>

</para>

<para l="1416" t="3355" r="5794" b="4915" alignment="justified" li="360" ri="72" spaceBefore="123" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1416" t="3355" r="5774" b="3538" baseLine="3490" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3355" r="2160" b="3528">Beaufort,</wd>

<space/>

<wd l="2218" t="3360" r="2443" b="3528">R.,</wd>

<space/>

<wd l="2506" t="3355" r="3312" b="3528">Roekhaut,</wd>

<space/>

<wd l="3379" t="3355" r="3576" b="3528">S.,</wd>

<space/>

<wd l="3638" t="3355" r="4406" b="3538">Cougnon,</wd>

<space/>

<wd l="4469" t="3355" r="4944" b="3528">L.-A.,</wd>

<space/>

<wd l="5006" t="3355" r="5150" b="3499">&amp;</wd>

<space/>

<wd l="5208" t="3355" r="5774" b="3528">Fairon,</wd>

<space/>

</ln>

<ln l="1651" t="3586" r="5779" b="3768" baseLine="3720" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="3586" r="1814" b="3730">C.</wd>

<space/>

<wd l="1891" t="3586" r="2458" b="3768">(2010).</wd>

<space/>

<wd l="2530" t="3586" r="2674" b="3725">A</wd>

<space/>

<wd l="2726" t="3586" r="3293" b="3768">Hybrid</wd>

<space/>

<wd l="3350" t="3586" r="4824" b="3730">Rule/Model-based</wd>

<space/>

<wd l="4882" t="3586" r="5779" b="3730">Finite-state</wd>

<space/>

</ln>

<ln l="1646" t="3811" r="5784" b="3994" baseLine="3950" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="3811" r="2568" b="3955">Framework</wd>

<space/>

<wd l="2688" t="3811" r="2918" b="3955">for</wd>

<space/>

<wd l="3024" t="3811" r="4037" b="3994">Normalizing</wd>

<space/>

<wd l="4166" t="3811" r="4546" b="3955">SMS</wd>

<space/>

<wd l="4670" t="3816" r="5486" b="3994">Messages.</wd>

<space/>

<wd l="5621" t="3816" r="5784" b="3950">In</wd>

<space/>

</ln>

<ln l="1646" t="4042" r="5779" b="4224" baseLine="4181" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="4042" r="2616" b="4224">Proceedings</wd>

<space/>

<wd l="2678" t="4042" r="2870" b="4186">of</wd>

<space/>

<wd l="2899" t="4042" r="3139" b="4186">the</wd>

<space/>

<wd l="3197" t="4042" r="3552" b="4186">48th</wd>

<space/>

<wd l="3610" t="4042" r="4195" b="4186">Annual</wd>

<space/>

<wd l="4253" t="4042" r="4915" b="4224">Meeting</wd>

<space/>

<wd l="4978" t="4042" r="5165" b="4186">of</wd>

<space/>

<wd l="5194" t="4042" r="5434" b="4186">the</wd>

<space/>

<wd l="5496" t="4042" r="5779" b="4186">As-</wd>

</ln>

<ln l="1656" t="4272" r="5794" b="4454" baseLine="4411" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="4272" r="2366" b="4416">sociation</wd>

<space/>

<wd l="2448" t="4272" r="2683" b="4416">for</wd>

<space/>

<wd l="2760" t="4272" r="3941" b="4454">Computational</wd>

<space/>

<wd l="4022" t="4272" r="4906" b="4454">Linguistics</wd>

<space/>

<wd l="4992" t="4272" r="5294" b="4454">(pp.</wd>

<space/>

<wd l="5390" t="4272" r="5794" b="4416">770–</wd>

<space/>

</ln>

<ln l="1651" t="4502" r="5789" b="4685" baseLine="4642" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="4502" r="2054" b="4685">779).</wd>

<space/>

<wd l="2222" t="4502" r="3230" b="4685">Stroudsburg,</wd>

<space/>

<wd l="3389" t="4502" r="3686" b="4675">PA,</wd>

<space/>

<wd l="3845" t="4502" r="4291" b="4646">USA:</wd>

<space/>

<wd l="4454" t="4502" r="5400" b="4646">Association</wd>

<space/>

<wd l="5554" t="4502" r="5789" b="4646">for</wd>

<space/>

</ln>

<ln l="1651" t="4733" r="3811" b="4915" baseLine="4872" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="4733" r="2827" b="4915">Computational</wd>

<space/>

<wd l="2885" t="4733" r="3811" b="4915">Linguistics.</wd>

</ln>

</para>

<para l="1416" t="5083" r="5779" b="5957" alignment="justified" li="360" ri="72" spaceBefore="117" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1416" t="5083" r="5779" b="5266" baseLine="5222" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5088" r="2006" b="5266">Berger,</wd>

<space/>

<wd l="2074" t="5083" r="2251" b="5227">A.</wd>

<space/>

<wd l="2323" t="5088" r="2539" b="5256">L.,</wd>

<space/>

<wd l="2606" t="5083" r="3115" b="5256">Pietra,</wd>

<space/>

<wd l="3182" t="5088" r="3365" b="5227">V.</wd>

<space/>

<wd l="3437" t="5088" r="3552" b="5227">J.</wd>

<space/>

<wd l="3624" t="5083" r="4094" b="5256">Della,</wd>

<space/>

<wd l="4166" t="5083" r="4310" b="5227">&amp;</wd>

<space/>

<wd l="4378" t="5083" r="4886" b="5256">Pietra,</wd>

<space/>

<wd l="4963" t="5083" r="5102" b="5227">S.</wd>

<space/>

<wd l="5174" t="5083" r="5357" b="5227">A.</wd>

<space/>

<wd l="5429" t="5083" r="5779" b="5227">Del-</wd>

</ln>

<ln l="1651" t="5314" r="5779" b="5496" baseLine="5448" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="5314" r="1829" b="5458">la.</wd>

<space/>

<wd l="1906" t="5314" r="2477" b="5496">(1996).</wd>

<space/>

<wd l="2549" t="5314" r="2693" b="5453">A</wd>

<space/>

<wd l="2755" t="5314" r="3590" b="5458">Maximum</wd>

<space/>

<wd l="3648" t="5318" r="4296" b="5496">Entropy</wd>

<space/>

<wd l="4358" t="5314" r="5146" b="5496">Approach</wd>

<space/>

<wd l="5208" t="5333" r="5357" b="5458">to</wd>

<space/>

<wd l="5419" t="5318" r="5779" b="5458">Nat-</wd>

</ln>

<ln l="1646" t="5544" r="5774" b="5726" baseLine="5678" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="5544" r="1954" b="5688">ural</wd>

<space/>

<wd l="2122" t="5549" r="2904" b="5726">Language</wd>

<space/>

<wd l="3072" t="5544" r="3979" b="5726">Processing.</wd>

<space/>

<wd l="4162" t="5544" r="4838" b="5726">Comput.</wd>

<space/>

<wd l="5016" t="5544" r="5774" b="5726">Linguist.,</wd>

<space/>

</ln>

<ln l="1646" t="5774" r="2717" b="5957" baseLine="5909" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="5774" r="2122" b="5957">22(1),</wd>

<space/>

<wd l="2184" t="5774" r="2717" b="5918">39–71.</wd>

</ln>

</para>

<para l="1416" t="6125" r="5784" b="6998" alignment="justified" li="360" ri="72" spaceBefore="121" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1416" t="6125" r="5770" b="6307" baseLine="6259" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="6125" r="2405" b="6307">Bieswanger,</wd>

<space/>

<wd l="2472" t="6130" r="2688" b="6269">M.</wd>

<space/>

<wd l="2765" t="6125" r="3331" b="6307">(2007).</wd>

<space/>

<wd l="3403" t="6125" r="3494" b="6264">2</wd>

<space/>

<wd l="3571" t="6125" r="4253" b="6269">abbrevi8</wd>

<space/>

<wd l="4330" t="6168" r="4498" b="6269">or</wd>

<space/>

<wd l="4550" t="6144" r="4810" b="6269">not</wd>

<space/>

<wd l="4867" t="6125" r="4958" b="6264">2</wd>

<space/>

<wd l="5030" t="6125" r="5770" b="6269">abbrevi8:</wd>

<space/>

</ln>

<ln l="1646" t="6355" r="5784" b="6538" baseLine="6490" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="6355" r="1790" b="6494">A</wd>

<space/>

<wd l="1901" t="6355" r="2813" b="6499">Contrastive</wd>

<space/>

<wd l="2923" t="6355" r="3619" b="6538">Analysis</wd>

<space/>

<wd l="3734" t="6355" r="3922" b="6499">of</wd>

<space/>

<wd l="4003" t="6355" r="4738" b="6499">Different</wd>

<space/>

<wd l="4853" t="6355" r="5386" b="6538">Space-</wd>

<space/>

<wd l="5501" t="6355" r="5784" b="6499">and</wd>

<space/>

</ln>

<ln l="1651" t="6586" r="5784" b="6768" baseLine="6720" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="6586" r="2683" b="6768">Time-Saving</wd>

<space/>

<wd l="2818" t="6586" r="3590" b="6768">Strategies</wd>

<space/>

<wd l="3725" t="6586" r="3878" b="6725">in</wd>

<space/>

<wd l="3998" t="6586" r="4608" b="6768">English</wd>

<space/>

<wd l="4733" t="6586" r="5016" b="6730">and</wd>

<space/>

<wd l="5146" t="6586" r="5784" b="6730">German</wd>

<space/>

</ln>

<ln l="1651" t="6816" r="5602" b="6998" baseLine="6950" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="6821" r="2016" b="6960">Text</wd>

<space/>

<wd l="2064" t="6821" r="2875" b="6998">Messages.</wd>

<space/>

<wd l="2942" t="6821" r="3413" b="6960">Texas</wd>

<space/>

<wd l="3466" t="6816" r="4277" b="6998">Linguistic</wd>

<space/>

<wd l="4330" t="6821" r="4901" b="6989">Forum,</wd>

<space/>

<wd l="4958" t="6816" r="5299" b="6960">Vol.</wd>

<space/>

<wd l="5371" t="6816" r="5602" b="6960">50.</wd>

</ln>

</para>

<para l="1421" t="7166" r="5779" b="7810" alignment="justified" li="360" ri="72" spaceBefore="122" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1421" t="7166" r="5779" b="7349" baseLine="7301" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7166" r="2357" b="7349">Choudhury,</wd>

<space/>

<wd l="2477" t="7171" r="2746" b="7339">M.,</wd>

<space/>

<wd l="2875" t="7166" r="3326" b="7339">Saraf,</wd>

<space/>

<wd l="3446" t="7171" r="3672" b="7339">R.,</wd>

<space/>

<wd l="3787" t="7166" r="4152" b="7339">Jain,</wd>

<space/>

<wd l="4272" t="7171" r="4507" b="7339">V.,</wd>

<space/>

<wd l="4637" t="7166" r="5189" b="7339">Sarkar,</wd>

<space/>

<wd l="5318" t="7166" r="5510" b="7339">S.,</wd>

<space/>

<wd l="5635" t="7166" r="5779" b="7310">&amp;</wd>

<space/>

</ln>

<ln l="1646" t="7397" r="5779" b="7579" baseLine="7531" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="7402" r="2088" b="7570">Basu,</wd>

<space/>

<wd l="2150" t="7397" r="2333" b="7541">A.</wd>

<space/>

<wd l="2405" t="7397" r="2976" b="7579">(2007).</wd>

<space/>

<wd l="3048" t="7397" r="4085" b="7579">Investigation</wd>

<space/>

<wd l="4142" t="7397" r="4426" b="7541">and</wd>

<space/>

<wd l="4483" t="7397" r="5256" b="7579">Modeling</wd>

<space/>

<wd l="5318" t="7397" r="5506" b="7541">of</wd>

<space/>

<wd l="5539" t="7397" r="5779" b="7541">the</wd>

<space/>

</ln>

<ln l="1656" t="7627" r="4747" b="7810" baseLine="7762" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="7627" r="2376" b="7771">Structure</wd>

<space/>

<wd l="2434" t="7627" r="2621" b="7771">of</wd>

<space/>

<wd l="2650" t="7627" r="3264" b="7810">Texting</wd>

<space/>

<wd l="3317" t="7632" r="4147" b="7810">Language,</wd>

<space/>

<wd l="4210" t="7627" r="4747" b="7771">63–70.</wd>

</ln>

</para>

<para l="1421" t="7978" r="5784" b="9038" alignment="justified" li="360" ri="72" spaceBefore="122" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1421" t="7978" r="5784" b="8160" baseLine="8112" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7978" r="1901" b="8150">Clark,</wd>

<space/>

<wd l="1978" t="7982" r="2194" b="8150">E.,</wd>

<space/>

<wd l="2270" t="7978" r="2414" b="8122">&amp;</wd>

<space/>

<wd l="2491" t="7978" r="2986" b="8150">Araki,</wd>

<space/>

<wd l="3058" t="7982" r="3240" b="8122">K.</wd>

<space/>

<wd l="3326" t="7978" r="3893" b="8160">(2011).</wd>

<space/>

<wd l="3974" t="7982" r="4339" b="8122">Text</wd>

<space/>

<wd l="4402" t="7978" r="5558" b="8122">Normalization</wd>

<space/>

<wd l="5630" t="7978" r="5784" b="8117">in</wd>

<space/>

</ln>

<ln l="1656" t="8203" r="5774" b="8386" baseLine="8338" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="8203" r="2141" b="8347">Social</wd>

<space/>

<wd l="2198" t="8203" r="2750" b="8347">Media:</wd>

<space/>

<wd l="2818" t="8208" r="3547" b="8386">Progress,</wd>

<space/>

<wd l="3610" t="8203" r="4358" b="8347">Problems</wd>

<space/>

<wd l="4421" t="8203" r="4704" b="8347">and</wd>

<space/>

<wd l="4762" t="8203" r="5774" b="8386">Applications</wd>

<space/>

</ln>

<ln l="1651" t="8434" r="5770" b="8616" baseLine="8573" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="8434" r="1882" b="8578">for</wd>

<space/>

<wd l="1992" t="8477" r="2078" b="8578">a</wd>

<space/>

<wd l="2189" t="8434" r="3379" b="8616">Pre-Processing</wd>

<space/>

<wd l="3504" t="8434" r="4085" b="8616">System</wd>

<space/>

<wd l="4200" t="8434" r="4387" b="8578">of</wd>

<space/>

<wd l="4474" t="8434" r="5006" b="8578">Casual</wd>

<space/>

<wd l="5122" t="8434" r="5770" b="8616">English.</wd>

<space/>

</ln>

<ln l="1646" t="8664" r="5779" b="8846" baseLine="8803" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="8664" r="2347" b="8808">Procedia</wd>

<space/>

<wd l="2429" t="8750" r="2486" b="8770">-</wd>

<space/>

<wd l="2578" t="8664" r="3067" b="8808">Social</wd>

<space/>

<wd l="3154" t="8664" r="3437" b="8808">and</wd>

<space/>

<wd l="3514" t="8664" r="4392" b="8808">Behavioral</wd>

<space/>

<wd l="4483" t="8664" r="5213" b="8837">Sciences,</wd>

<space/>

<wd l="5299" t="8664" r="5779" b="8846">27(0),</wd>

<space/>

</ln>

<ln l="1646" t="8894" r="2088" b="9038" baseLine="9034" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="8894" r="2088" b="9038">2–11.</wd>

</ln>

</para>

<para l="1421" t="9245" r="5789" b="10579" alignment="justified" li="360" ri="72" spaceBefore="117" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1421" t="9245" r="5770" b="9427" baseLine="9384" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9245" r="2314" b="9418">Contractor,</wd>

<space/>

<wd l="2400" t="9250" r="2640" b="9418">D.,</wd>

<space/>

<wd l="2726" t="9245" r="3475" b="9427">Faruquie,</wd>

<space/>

<wd l="3566" t="9250" r="3725" b="9389">T.</wd>

<space/>

<wd l="3816" t="9245" r="4051" b="9418">A.,</wd>

<space/>

<wd l="4142" t="9245" r="4286" b="9389">&amp;</wd>

<space/>

<wd l="4378" t="9245" r="5525" b="9418">Subramaniam,</wd>

<space/>

<wd l="5611" t="9250" r="5770" b="9389">L.</wd>

<space/>

</ln>

<ln l="1646" t="9475" r="5770" b="9658" baseLine="9614" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="9480" r="1829" b="9619">V.</wd>

<space/>

<wd l="1910" t="9475" r="2482" b="9658">(2010).</wd>

<space/>

<wd l="2558" t="9475" r="3653" b="9658">Unsupervised</wd>

<space/>

<wd l="3725" t="9475" r="4517" b="9658">Cleansing</wd>

<space/>

<wd l="4594" t="9475" r="4781" b="9619">of</wd>

<space/>

<wd l="4814" t="9475" r="5299" b="9658">Noisy</wd>

<space/>

<wd l="5366" t="9480" r="5770" b="9619">Text.</wd>

<space/>

</ln>

<ln l="1651" t="9706" r="5779" b="9888" baseLine="9840" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9710" r="1814" b="9845">In</wd>

<space/>

<wd l="1906" t="9706" r="2880" b="9888">Proceedings</wd>

<space/>

<wd l="2986" t="9706" r="3173" b="9850">of</wd>

<space/>

<wd l="3240" t="9706" r="3480" b="9850">the</wd>

<space/>

<wd l="3581" t="9706" r="3950" b="9850">23rd</wd>

<space/>

<wd l="4051" t="9706" r="5064" b="9850">International</wd>

<space/>

<wd l="5165" t="9706" r="5779" b="9850">Confer-</wd>

</ln>

<ln l="1651" t="9936" r="5774" b="10118" baseLine="10070" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9979" r="2006" b="10080">ence</wd>

<space/>

<wd l="2126" t="9979" r="2323" b="10080">on</wd>

<space/>

<wd l="2434" t="9936" r="3614" b="10118">Computational</wd>

<space/>

<wd l="3725" t="9936" r="4656" b="10118">Linguistics:</wd>

<space/>

<wd l="4781" t="9941" r="5352" b="10080">Posters</wd>

<space/>

<wd l="5472" t="9936" r="5774" b="10118">(pp.</wd>

<space/>

</ln>

<ln l="1666" t="10166" r="5789" b="10349" baseLine="10301" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1666" t="10166" r="2453" b="10349">189–196).</wd>

<space/>

<wd l="2544" t="10166" r="3552" b="10349">Stroudsburg,</wd>

<space/>

<wd l="3634" t="10166" r="3931" b="10339">PA,</wd>

<space/>

<wd l="4008" t="10166" r="4450" b="10310">USA:</wd>

<space/>

<wd l="4536" t="10166" r="5482" b="10310">Association</wd>

<space/>

<wd l="5554" t="10166" r="5789" b="10310">for</wd>

<space/>

</ln>

<ln l="1651" t="10397" r="3811" b="10579" baseLine="10531" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="10397" r="2827" b="10579">Computational</wd>

<space/>

<wd l="2885" t="10397" r="3811" b="10579">Linguistics.</wd>

</ln>

</para>

<para l="1421" t="10747" r="5784" b="12077" alignment="justified" li="360" ri="72" spaceBefore="123" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1421" t="10747" r="5784" b="10930" baseLine="10882" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10747" r="1891" b="10920">Cook,</wd>

<space/>

<wd l="1978" t="10752" r="2184" b="10920">P.,</wd>

<space/>

<wd l="2275" t="10747" r="2419" b="10891">&amp;</wd>

<space/>

<wd l="2515" t="10747" r="3370" b="10920">Stevenson,</wd>

<space/>

<wd l="3466" t="10747" r="3605" b="10891">S.</wd>

<space/>

<wd l="3701" t="10747" r="4272" b="10930">(2009).</wd>

<space/>

<wd l="4363" t="10747" r="4603" b="10886">An</wd>

<space/>

<wd l="4685" t="10747" r="5784" b="10930">Unsupervised</wd>

<space/>

</ln>

<ln l="1646" t="10978" r="5779" b="11160" baseLine="11112" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="10978" r="2165" b="11122">Model</wd>

<space/>

<wd l="2275" t="10978" r="2506" b="11122">for</wd>

<space/>

<wd l="2606" t="10982" r="2971" b="11122">Text</wd>

<space/>

<wd l="3067" t="10982" r="3763" b="11160">Message</wd>

<space/>

<wd l="3859" t="10978" r="5059" b="11122">Normalization.</wd>

<space/>

<wd l="5179" t="10982" r="5342" b="11117">In</wd>

<space/>

<wd l="5438" t="10982" r="5779" b="11122">Pro-</wd>

</ln>

<ln l="1651" t="11203" r="5779" b="11386" baseLine="11342" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="11203" r="2338" b="11386">ceedings</wd>

<space/>

<wd l="2429" t="11203" r="2616" b="11347">of</wd>

<space/>

<wd l="2674" t="11203" r="2914" b="11347">the</wd>

<space/>

<wd l="3000" t="11203" r="3830" b="11386">Workshop</wd>

<space/>

<wd l="3922" t="11246" r="4118" b="11347">on</wd>

<space/>

<wd l="4205" t="11203" r="5386" b="11386">Computational</wd>

<space/>

<wd l="5472" t="11203" r="5779" b="11386">Ap-</wd>

</ln>

<ln l="1642" t="11434" r="5774" b="11616" baseLine="11573" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="11434" r="2352" b="11616">proaches</wd>

<space/>

<wd l="2501" t="11453" r="2654" b="11578">to</wd>

<space/>

<wd l="2803" t="11434" r="3610" b="11616">Linguistic</wd>

<space/>

<wd l="3763" t="11434" r="4560" b="11616">Creativity</wd>

<space/>

<wd l="4709" t="11434" r="5011" b="11616">(pp.</wd>

<space/>

<wd l="5174" t="11434" r="5774" b="11616">71–78).</wd>

<space/>

</ln>

<ln l="1656" t="11664" r="5779" b="11846" baseLine="11803" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="11664" r="2664" b="11846">Stroudsburg,</wd>

<space/>

<wd l="2746" t="11664" r="3043" b="11837">PA,</wd>

<space/>

<wd l="3125" t="11664" r="3566" b="11808">USA:</wd>

<space/>

<wd l="3658" t="11664" r="4603" b="11808">Association</wd>

<space/>

<wd l="4680" t="11664" r="4910" b="11808">for</wd>

<space/>

<wd l="4987" t="11664" r="5779" b="11846">Computa-</wd>

</ln>

<ln l="1646" t="11894" r="3077" b="12077" baseLine="12034" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="11894" r="2098" b="12038">tional</wd>

<space/>

<wd l="2150" t="11894" r="3077" b="12077">Linguistics.</wd>

</ln>

</para>

<para l="1416" t="12245" r="5779" b="13118" alignment="justified" li="360" ri="72" spaceBefore="116" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1416" t="12245" r="5774" b="12427" baseLine="12384" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12245" r="2304" b="12427">Daugherty,</wd>

<space/>

<wd l="2414" t="12250" r="2626" b="12418">T.,</wd>

<space/>

<wd l="2731" t="12245" r="3274" b="12418">Eastin,</wd>

<space/>

<wd l="3379" t="12250" r="3595" b="12389">M.</wd>

<space/>

<wd l="3715" t="12245" r="3912" b="12418">S.,</wd>

<space/>

<wd l="4022" t="12245" r="4166" b="12389">&amp;</wd>

<space/>

<wd l="4267" t="12245" r="4824" b="12427">Bright,</wd>

<space/>

<wd l="4930" t="12250" r="5088" b="12389">L.</wd>

<space/>

<wd l="5203" t="12245" r="5774" b="12427">(2008).</wd>

<space/>

</ln>

<ln l="1646" t="12475" r="5779" b="12658" baseLine="12610" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12475" r="2438" b="12658">Exploring</wd>

<space/>

<wd l="2515" t="12475" r="3336" b="12619">Consumer</wd>

<space/>

<wd l="3398" t="12475" r="4363" b="12619">Motivations</wd>

<space/>

<wd l="4440" t="12475" r="4675" b="12619">for</wd>

<space/>

<wd l="4742" t="12475" r="5419" b="12658">Creating</wd>

<space/>

<wd l="5491" t="12480" r="5779" b="12619">Us-</wd>

</ln>

<ln l="1651" t="12706" r="5779" b="12850" baseLine="12840" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="12706" r="2688" b="12850">er-Generated</wd>

<space/>

<wd l="2789" t="12706" r="3456" b="12850">Content.</wd>

<space/>

<wd l="3566" t="12706" r="4152" b="12850">Journal</wd>

<space/>

<wd l="4258" t="12706" r="4445" b="12850">of</wd>

<space/>

<wd l="4522" t="12706" r="5366" b="12850">Interactive</wd>

<space/>

<wd l="5472" t="12706" r="5779" b="12850">Ad-</wd>

</ln>

<ln l="1646" t="12936" r="2818" b="13118" baseLine="13070" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12936" r="2386" b="13118">vertising,</wd>

<space/>

<wd l="2453" t="12936" r="2818" b="13118">8(2).</wd>

</ln>

</para>

<para l="1421" t="13286" r="5784" b="14808" alignment="justified" li="360" ri="72" spaceBefore="123" fli="-288" lsp="exactly" lspExact="228" language="en">

<ln l="1421" t="13286" r="5774" b="13469" baseLine="13421" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13286" r="1982" b="13459">Gadde,</wd>

<space/>

<wd l="2045" t="13291" r="2251" b="13459">P.,</wd>

<space/>

<wd l="2318" t="13286" r="2995" b="13459">Goutam,</wd>

<space/>

<wd l="3058" t="13291" r="3283" b="13459">R.,</wd>

<space/>

<wd l="3355" t="13286" r="3787" b="13459">Shah,</wd>

<space/>

<wd l="3854" t="13291" r="4080" b="13459">R.,</wd>

<space/>

<wd l="4142" t="13291" r="5050" b="13469">Bayyarapu,</wd>

<space/>

<wd l="5112" t="13291" r="5294" b="13430">H.</wd>

<space/>

<wd l="5371" t="13286" r="5563" b="13459">S.,</wd>

<space/>

<wd l="5630" t="13286" r="5774" b="13430">&amp;</wd>

<space/>

</ln>

<ln l="1656" t="13517" r="5779" b="13699" baseLine="13651" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="13517" r="2798" b="13690">Subramaniam,</wd>

<space/>

<wd l="2870" t="13522" r="3029" b="13661">L.</wd>

<space/>

<wd l="3106" t="13522" r="3288" b="13661">V.</wd>

<space/>

<wd l="3370" t="13517" r="3936" b="13699">(2011).</wd>

<space/>

<wd l="4013" t="13517" r="5016" b="13699">Experiments</wd>

<space/>

<wd l="5088" t="13517" r="5443" b="13661">with</wd>

<space/>

<wd l="5506" t="13517" r="5779" b="13656">Ar-</wd>

</ln>

<ln l="1646" t="13747" r="5784" b="13930" baseLine="13882" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="13747" r="2323" b="13930">tificially</wd>

<space/>

<wd l="2458" t="13747" r="3274" b="13891">Generated</wd>

<space/>

<wd l="3403" t="13747" r="3869" b="13891">Noise</wd>

<space/>

<wd l="4013" t="13747" r="4248" b="13891">for</wd>

<space/>

<wd l="4378" t="13747" r="5170" b="13930">Cleansing</wd>

<space/>

<wd l="5299" t="13747" r="5784" b="13930">Noisy</wd>

<space/>

</ln>

<ln l="1651" t="13973" r="5784" b="14155" baseLine="14112" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="13978" r="2050" b="14117">Text.</wd>

<space/>

<wd l="2146" t="13978" r="2309" b="14112">In</wd>

<space/>

<wd l="2386" t="13973" r="3355" b="14155">Proceedings</wd>

<space/>

<wd l="3442" t="13973" r="3629" b="14117">of</wd>

<space/>

<wd l="3686" t="13973" r="3926" b="14117">the</wd>

<space/>

<wd l="4008" t="13973" r="4387" b="14117">2011</wd>

<space/>

<wd l="4488" t="13973" r="4877" b="14117">Joint</wd>

<space/>

<wd l="4954" t="13973" r="5784" b="14155">Workshop</wd>

<space/>

</ln>

<ln l="1651" t="14203" r="5779" b="14386" baseLine="14342" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="14246" r="1848" b="14347">on</wd>

<space/>

<wd l="1910" t="14203" r="2904" b="14386">Multilingual</wd>

<space/>

<wd l="2976" t="14203" r="3389" b="14347">OCR</wd>

<space/>

<wd l="3451" t="14203" r="3734" b="14347">and</wd>

<space/>

<wd l="3802" t="14203" r="4565" b="14386">Analytics</wd>

<space/>

<wd l="4637" t="14203" r="4872" b="14347">for</wd>

<space/>

<wd l="4925" t="14203" r="5410" b="14386">Noisy</wd>

<space/>

<wd l="5472" t="14208" r="5779" b="14347">Un-</wd>

</ln>

<ln l="1656" t="14434" r="5774" b="14616" baseLine="14573" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="14434" r="2443" b="14578">structured</wd>

<space/>

<wd l="2501" t="14438" r="2866" b="14578">Text</wd>

<space/>

<wd l="2918" t="14438" r="3298" b="14578">Data</wd>

<space/>

<wd l="3355" t="14434" r="3658" b="14616">(pp.</wd>

<space/>

<wd l="3720" t="14434" r="4435" b="14616">4:1–4:8).</wd>

<space/>

<wd l="4498" t="14438" r="4882" b="14578">New</wd>

<space/>

<wd l="4930" t="14434" r="5381" b="14606">York,</wd>

<space/>

<wd l="5438" t="14438" r="5774" b="14606">NY,</wd>

<space/>

</ln>

<ln l="1646" t="14664" r="5165" b="14808" baseLine="14803" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="14664" r="2088" b="14808">USA:</wd>

<space/>

<wd l="2150" t="14664" r="2645" b="14808">ACM.</wd>

<space/>

<wd l="2712" t="14664" r="5165" b="14808">doi:10.1145/2034617.2034622</wd>

</ln>

</para>

</column>

<column l="6118" t="1420" r="10520" b="15033">

<para l="6120" t="1459" r="10488" b="3024" alignment="justified" li="216" fli="-216" lsp="exactly" lspExact="229" language="en">

<ln l="6120" t="1459" r="10488" b="1642" baseLine="1598" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="1464" r="6494" b="1632">Han,</wd>

<space/>

<wd l="6557" t="1464" r="6782" b="1632">B.,</wd>

<space/>

<wd l="6850" t="1459" r="6994" b="1603">&amp;</wd>

<space/>

<wd l="7051" t="1459" r="7771" b="1632">Baldwin,</wd>

<space/>

<wd l="7834" t="1464" r="7992" b="1603">T.</wd>

<space/>

<wd l="8059" t="1459" r="8626" b="1642">(2011).</wd>

<space/>

<wd l="8693" t="1459" r="9283" b="1603">Lexical</wd>

<space/>

<wd l="9336" t="1459" r="10488" b="1603">Normalisation</wd>

<space/>

</ln>

<ln l="6355" t="1690" r="10488" b="1872" baseLine="1829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="1690" r="6542" b="1834">of</wd>

<space/>

<wd l="6586" t="1690" r="7013" b="1834">Short</wd>

<space/>

<wd l="7075" t="1694" r="7440" b="1834">Text</wd>

<space/>

<wd l="7502" t="1694" r="8323" b="1872">Messages:</wd>

<space/>

<wd l="8395" t="1690" r="8866" b="1834">Makn</wd>

<space/>

<wd l="8938" t="1690" r="9298" b="1834">Sens</wd>

<space/>

<wd l="9370" t="1733" r="9456" b="1834">a</wd>

<space/>

<wd l="9518" t="1690" r="10243" b="1834">#Twitter.</wd>

<space/>

<wd l="10325" t="1694" r="10488" b="1829">In</wd>

<space/>

</ln>

<ln l="6350" t="1920" r="10483" b="2102" baseLine="2054" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="1920" r="7320" b="2102">Proceedings</wd>

<space/>

<wd l="7387" t="1920" r="7574" b="2064">of</wd>

<space/>

<wd l="7603" t="1920" r="7843" b="2064">the</wd>

<space/>

<wd l="7901" t="1920" r="8261" b="2064">49th</wd>

<space/>

<wd l="8314" t="1920" r="8899" b="2064">Annual</wd>

<space/>

<wd l="8957" t="1920" r="9619" b="2102">Meeting</wd>

<space/>

<wd l="9682" t="1920" r="9869" b="2064">of</wd>

<space/>

<wd l="9898" t="1920" r="10142" b="2064">the</wd>

<space/>

<wd l="10200" t="1920" r="10483" b="2064">As-</wd>

</ln>

<ln l="6360" t="2150" r="10488" b="2333" baseLine="2285" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="2150" r="7070" b="2294">sociation</wd>

<space/>

<wd l="7190" t="2150" r="7426" b="2294">for</wd>

<space/>

<wd l="7541" t="2150" r="8717" b="2333">Computational</wd>

<space/>

<wd l="8837" t="2150" r="9768" b="2333">Linguistics:</wd>

<space/>

<wd l="9898" t="2155" r="10488" b="2294">Human</wd>

<space/>

</ln>

<ln l="6350" t="2381" r="10478" b="2563" baseLine="2515" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="2386" r="7133" b="2563">Language</wd>

<space/>

<wd l="7195" t="2381" r="8261" b="2563">Technologies</wd>

<space/>

<wd l="8333" t="2467" r="8390" b="2486">-</wd>

<space/>

<wd l="8443" t="2381" r="9086" b="2525">Volume</wd>

<space/>

<wd l="9163" t="2381" r="9221" b="2520">1</wd>

<space/>

<wd l="9302" t="2381" r="9605" b="2563">(pp.</wd>

<space/>

<wd l="9677" t="2381" r="10478" b="2563">368–378).</wd>

<space/>

</ln>

<ln l="6360" t="2611" r="10483" b="2794" baseLine="2746" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="2611" r="7368" b="2794">Stroudsburg,</wd>

<space/>

<wd l="7450" t="2611" r="7747" b="2784">PA,</wd>

<space/>

<wd l="7829" t="2611" r="8270" b="2755">USA:</wd>

<space/>

<wd l="8362" t="2611" r="9307" b="2755">Association</wd>

<space/>

<wd l="9384" t="2611" r="9614" b="2755">for</wd>

<space/>

<wd l="9691" t="2611" r="10483" b="2794">Computa-</wd>

</ln>

<ln l="6350" t="2842" r="7781" b="3024" baseLine="2976" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="2842" r="6802" b="2986">tional</wd>

<space/>

<wd l="6854" t="2842" r="7781" b="3024">Linguistics.</wd>

</ln>

</para>

<para l="6120" t="3192" r="10493" b="4757" alignment="justified" li="216" spaceBefore="123" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="3192" r="10478" b="3374" baseLine="3326" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="3197" r="6494" b="3365">Han,</wd>

<space/>

<wd l="6576" t="3197" r="6806" b="3365">B.,</wd>

<space/>

<wd l="6893" t="3192" r="7363" b="3365">Cook,</wd>

<space/>

<wd l="7445" t="3197" r="7651" b="3365">P.,</wd>

<space/>

<wd l="7738" t="3192" r="7882" b="3336">&amp;</wd>

<space/>

<wd l="7958" t="3192" r="8678" b="3365">Baldwin,</wd>

<space/>

<wd l="8765" t="3197" r="8923" b="3336">T.</wd>

<space/>

<wd l="9014" t="3192" r="9581" b="3374">(2012).</wd>

<space/>

<wd l="9667" t="3192" r="10478" b="3336">Automati-</wd>

</ln>

<ln l="6355" t="3422" r="10493" b="3605" baseLine="3557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="3422" r="6739" b="3605">cally</wd>

<space/>

<wd l="6821" t="3422" r="7843" b="3605">Constructing</wd>

<space/>

<wd l="7930" t="3466" r="8016" b="3566">a</wd>

<space/>

<wd l="8088" t="3422" r="9240" b="3566">Normalisation</wd>

<space/>

<wd l="9317" t="3422" r="10176" b="3605">Dictionary</wd>

<space/>

<wd l="10258" t="3422" r="10493" b="3566">for</wd>

<space/>

</ln>

<ln l="6350" t="3653" r="10483" b="3835" baseLine="3787" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="3653" r="7310" b="3835">Microblogs.</wd>

<space/>

<wd l="7387" t="3658" r="7550" b="3792">In</wd>

<space/>

<wd l="7608" t="3653" r="8578" b="3835">Proceedings</wd>

<space/>

<wd l="8650" t="3653" r="8837" b="3797">of</wd>

<space/>

<wd l="8870" t="3653" r="9115" b="3797">the</wd>

<space/>

<wd l="9178" t="3653" r="9571" b="3797">2012</wd>

<space/>

<wd l="9638" t="3653" r="10032" b="3797">Joint</wd>

<space/>

<wd l="10094" t="3653" r="10483" b="3797">Con-</wd>

</ln>

<ln l="6355" t="3883" r="10483" b="4066" baseLine="4018" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="3883" r="6931" b="4027">ference</wd>

<space/>

<wd l="6994" t="3926" r="7190" b="4027">on</wd>

<space/>

<wd l="7243" t="3883" r="8030" b="4066">Empirical</wd>

<space/>

<wd l="8083" t="3883" r="8779" b="4027">Methods</wd>

<space/>

<wd l="8846" t="3883" r="8995" b="4022">in</wd>

<space/>

<wd l="9043" t="3883" r="9643" b="4027">Natural</wd>

<space/>

<wd l="9701" t="3888" r="10483" b="4066">Language</wd>

<space/>

</ln>

<ln l="6350" t="4114" r="10483" b="4296" baseLine="4248" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="4114" r="7210" b="4296">Processing</wd>

<space/>

<wd l="7320" t="4114" r="7603" b="4258">and</wd>

<space/>

<wd l="7714" t="4114" r="8894" b="4296">Computational</wd>

<space/>

<wd l="8990" t="4114" r="9590" b="4258">Natural</wd>

<space/>

<wd l="9696" t="4118" r="10483" b="4296">Language</wd>

<space/>

</ln>

<ln l="6350" t="4344" r="10474" b="4526" baseLine="4478" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="4344" r="7066" b="4526">Learning</wd>

<space/>

<wd l="7171" t="4344" r="7474" b="4526">(pp.</wd>

<space/>

<wd l="7584" t="4344" r="8390" b="4526">421–432).</wd>

<space/>

<wd l="8510" t="4344" r="9518" b="4526">Stroudsburg,</wd>

<space/>

<wd l="9624" t="4344" r="9926" b="4517">PA,</wd>

<space/>

<wd l="10032" t="4344" r="10474" b="4488">USA:</wd>

<space/>

</ln>

<ln l="6350" t="4574" r="9792" b="4757" baseLine="4709" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="4574" r="7296" b="4718">Association</wd>

<space/>

<wd l="7349" t="4574" r="7579" b="4718">for</wd>

<space/>

<wd l="7632" t="4574" r="8813" b="4757">Computational</wd>

<space/>

<wd l="8866" t="4574" r="9792" b="4757">Linguistics.</wd>

</ln>

</para>

<para l="6120" t="4920" r="10483" b="5755" alignment="justified" li="216" spaceBefore="121" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="4920" r="10474" b="5102" baseLine="5059" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="4920" r="6730" b="5093">Jelinek,</wd>

<space/>

<wd l="6821" t="4925" r="6970" b="5064">F.</wd>

<space/>

<wd l="7070" t="4920" r="7637" b="5102">(1990).</wd>

<space/>

<wd l="7733" t="4920" r="8472" b="5102">Readings</wd>

<space/>

<wd l="8568" t="4920" r="8717" b="5059">in</wd>

<space/>

<wd l="8808" t="4920" r="9379" b="5102">Speech</wd>

<space/>

<wd l="9461" t="4920" r="10474" b="5102">Recognition.</wd>

<space/>

</ln>

<ln l="6355" t="5150" r="10478" b="5333" baseLine="5290" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5155" r="6518" b="5290">In</wd>

<space/>

<wd l="6600" t="5150" r="6778" b="5294">A.</wd>

<space/>

<wd l="6874" t="5150" r="7450" b="5294">Waibel</wd>

<space/>

<wd l="7536" t="5150" r="7680" b="5294">&amp;</wd>

<space/>

<wd l="7771" t="5155" r="8179" b="5294">K.-F.</wd>

<space/>

<wd l="8275" t="5155" r="8568" b="5294">Lee</wd>

<space/>

<wd l="8659" t="5150" r="9182" b="5333">(Eds.),</wd>

<space/>

<wd l="9278" t="5150" r="9581" b="5333">(pp.</wd>

<space/>

<wd l="9672" t="5150" r="10478" b="5333">450–506).</wd>

<space/>

</ln>

<ln l="6360" t="5381" r="10483" b="5563" baseLine="5520" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="5381" r="6643" b="5525">San</wd>

<space/>

<wd l="6710" t="5381" r="7531" b="5554">Francisco,</wd>

<space/>

<wd l="7603" t="5381" r="7920" b="5554">CA,</wd>

<space/>

<wd l="7987" t="5381" r="8434" b="5525">USA:</wd>

<space/>

<wd l="8506" t="5386" r="9134" b="5563">Morgan</wd>

<space/>

<wd l="9202" t="5381" r="10046" b="5525">Kaufmann</wd>

<space/>

<wd l="10109" t="5381" r="10483" b="5525">Pub-</wd>

</ln>

<ln l="6355" t="5611" r="7214" b="5755" baseLine="5750" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5611" r="6864" b="5755">lishers</wd>

<space/>

<wd l="6926" t="5616" r="7214" b="5755">Inc.</wd>

</ln>

</para>

<para l="6120" t="5962" r="10483" b="6797" alignment="justified" li="216" spaceBefore="117" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="5962" r="10478" b="6144" baseLine="6101" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5962" r="7008" b="6134">Kaufmann,</wd>

<space/>

<wd l="7075" t="5966" r="7344" b="6134">M.,</wd>

<space/>

<wd l="7416" t="5962" r="7560" b="6106">&amp;</wd>

<space/>

<wd l="7627" t="5962" r="8155" b="6134">Kalita,</wd>

<space/>

<wd l="8222" t="5966" r="8342" b="6106">J.</wd>

<space/>

<wd l="8414" t="5962" r="8981" b="6144">(2010).</wd>

<space/>

<wd l="9062" t="5962" r="9787" b="6144">Syntactic</wd>

<space/>

<wd l="9850" t="5962" r="10478" b="6106">normal-</wd>

</ln>

<ln l="6355" t="6192" r="10483" b="6374" baseLine="6331" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="6192" r="6893" b="6336">ization</wd>

<space/>

<wd l="6974" t="6192" r="7162" b="6336">of</wd>

<space/>

<wd l="7219" t="6192" r="7810" b="6336">Twitter</wd>

<space/>

<wd l="7886" t="6235" r="8674" b="6374">messages.</wd>

<space/>

<wd l="8770" t="6192" r="9782" b="6336">International</wd>

<space/>

<wd l="9869" t="6192" r="10483" b="6336">Confer-</wd>

</ln>

<ln l="6355" t="6422" r="10478" b="6605" baseLine="6557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="6466" r="6710" b="6566">ence</wd>

<space/>

<wd l="6792" t="6466" r="6989" b="6566">on</wd>

<space/>

<wd l="7051" t="6422" r="7656" b="6566">Natural</wd>

<space/>

<wd l="7733" t="6427" r="8515" b="6605">Language</wd>

<space/>

<wd l="8592" t="6422" r="9499" b="6605">Processing,</wd>

<space/>

<wd l="9581" t="6422" r="10478" b="6605">Kharagpur,</wd>

<space/>

</ln>

<ln l="6355" t="6653" r="6797" b="6797" baseLine="6787" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="6653" r="6797" b="6797">India.</wd>

</ln>

</para>

<para l="6120" t="7003" r="10488" b="8333" alignment="justified" li="216" spaceBefore="123" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="7003" r="10474" b="7186" baseLine="7138" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="7003" r="7008" b="7186">Kernighan,</wd>

<space/>

<wd l="7109" t="7008" r="7325" b="7147">M.</wd>

<space/>

<wd l="7435" t="7008" r="7670" b="7176">D.,</wd>

<space/>

<wd l="7781" t="7003" r="8405" b="7176">Church,</wd>

<space/>

<wd l="8510" t="7008" r="8693" b="7147">K.</wd>

<space/>

<wd l="8798" t="7008" r="9082" b="7176">W.,</wd>

<space/>

<wd l="9192" t="7003" r="9336" b="7147">&amp;</wd>

<space/>

<wd l="9442" t="7003" r="9854" b="7176">Gale,</wd>

<space/>

<wd l="9960" t="7008" r="10186" b="7147">W.</wd>

<space/>

<wd l="10296" t="7003" r="10474" b="7147">A.</wd>

<space/>

</ln>

<ln l="6355" t="7234" r="10488" b="7416" baseLine="7368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="7234" r="6922" b="7416">(1990).</wd>

<space/>

<wd l="6994" t="7234" r="7138" b="7373">A</wd>

<space/>

<wd l="7205" t="7234" r="7858" b="7416">Spelling</wd>

<space/>

<wd l="7930" t="7234" r="8779" b="7378">Correction</wd>

<space/>

<wd l="8842" t="7238" r="9533" b="7416">Program</wd>

<space/>

<wd l="9586" t="7234" r="10075" b="7378">Based</wd>

<space/>

<wd l="10142" t="7277" r="10339" b="7378">on</wd>

<space/>

<wd l="10402" t="7277" r="10488" b="7378">a</wd>

<space/>

</ln>

<ln l="6346" t="7464" r="10488" b="7646" baseLine="7598" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="7464" r="6830" b="7646">Noisy</wd>

<space/>

<wd l="6902" t="7464" r="7560" b="7608">Channel</wd>

<space/>

<wd l="7637" t="7464" r="8198" b="7608">Model.</wd>

<space/>

<wd l="8290" t="7469" r="8453" b="7603">In</wd>

<space/>

<wd l="8525" t="7464" r="9494" b="7646">Proceedings</wd>

<space/>

<wd l="9581" t="7464" r="9768" b="7608">of</wd>

<space/>

<wd l="9816" t="7464" r="10056" b="7608">the</wd>

<space/>

<wd l="10152" t="7464" r="10488" b="7608">13th</wd>

<space/>

</ln>

<ln l="6355" t="7690" r="10483" b="7872" baseLine="7829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="7690" r="7267" b="7834">Conference</wd>

<space/>

<wd l="7373" t="7733" r="7574" b="7834">on</wd>

<space/>

<wd l="7675" t="7690" r="8861" b="7872">Computational</wd>

<space/>

<wd l="8962" t="7690" r="9845" b="7872">Linguistics</wd>

<space/>

<wd l="9960" t="7776" r="10018" b="7795">-</wd>

<space/>

<wd l="10118" t="7690" r="10483" b="7834">Vol-</wd>

</ln>

<ln l="6350" t="7920" r="10483" b="8102" baseLine="8059" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="7963" r="6686" b="8064">ume</wd>

<space/>

<wd l="6758" t="7920" r="6850" b="8059">2</wd>

<space/>

<wd l="6936" t="7920" r="7238" b="8102">(pp.</wd>

<space/>

<wd l="7320" t="7920" r="8126" b="8102">205–210).</wd>

<space/>

<wd l="8213" t="7920" r="9221" b="8102">Stroudsburg,</wd>

<space/>

<wd l="9298" t="7920" r="9595" b="8093">PA,</wd>

<space/>

<wd l="9672" t="7920" r="10114" b="8064">USA:</wd>

<space/>

<wd l="10200" t="7920" r="10483" b="8064">As-</wd>

</ln>

<ln l="6360" t="8150" r="9566" b="8333" baseLine="8290" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="8150" r="7070" b="8294">sociation</wd>

<space/>

<wd l="7123" t="8150" r="7358" b="8294">for</wd>

<space/>

<wd l="7406" t="8150" r="8587" b="8333">Computational</wd>

<space/>

<wd l="8645" t="8150" r="9566" b="8333">Linguistics.</wd>

</ln>

</para>

<para l="6120" t="8501" r="10493" b="9835" alignment="justified" li="216" spaceBefore="117" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="8501" r="10483" b="8683" baseLine="8640" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="8501" r="6686" b="8674">Kobus,</wd>

<space/>

<wd l="6758" t="8501" r="6979" b="8674">C.,</wd>

<space/>

<wd l="7051" t="8506" r="7536" b="8674">Yvon,</wd>

<space/>

<wd l="7608" t="8506" r="7814" b="8674">F.,</wd>

<space/>

<wd l="7891" t="8501" r="8035" b="8645">&amp;</wd>

<space/>

<wd l="8102" t="8501" r="8832" b="8674">Damnati,</wd>

<space/>

<wd l="8909" t="8501" r="9086" b="8645">G.</wd>

<space/>

<wd l="9163" t="8501" r="9734" b="8683">(2008).</wd>

<space/>

<wd l="9806" t="8501" r="10483" b="8645">Normal-</wd>

</ln>

<ln l="6355" t="8731" r="10478" b="8914" baseLine="8870" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="8731" r="6744" b="8914">izing</wd>

<space/>

<wd l="6821" t="8731" r="7253" b="8875">SMS:</wd>

<space/>

<wd l="7330" t="8731" r="7627" b="8875">Are</wd>

<space/>

<wd l="7694" t="8736" r="8054" b="8875">Two</wd>

<space/>

<wd l="8122" t="8731" r="8971" b="8914">Metaphors</wd>

<space/>

<wd l="9038" t="8736" r="9533" b="8875">Better</wd>

<space/>

<wd l="9595" t="8731" r="9998" b="8875">Than</wd>

<space/>

<wd l="10070" t="8731" r="10478" b="8875">One?</wd>

<space/>

</ln>

<ln l="6355" t="8962" r="10483" b="9144" baseLine="9101" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="8966" r="6518" b="9101">In</wd>

<space/>

<wd l="6600" t="8962" r="7570" b="9144">Proceedings</wd>

<space/>

<wd l="7661" t="8962" r="7848" b="9106">of</wd>

<space/>

<wd l="7906" t="8962" r="8146" b="9106">the</wd>

<space/>

<wd l="8232" t="8962" r="8678" b="9106">22Nd</wd>

<space/>

<wd l="8765" t="8962" r="9778" b="9106">International</wd>

<space/>

<wd l="9869" t="8962" r="10483" b="9106">Confer-</wd>

</ln>

<ln l="6355" t="9192" r="10478" b="9374" baseLine="9326" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="9235" r="6710" b="9336">ence</wd>

<space/>

<wd l="6773" t="9235" r="6970" b="9336">on</wd>

<space/>

<wd l="7022" t="9192" r="8203" b="9374">Computational</wd>

<space/>

<wd l="8261" t="9192" r="9144" b="9374">Linguistics</wd>

<space/>

<wd l="9206" t="9278" r="9264" b="9298">-</wd>

<space/>

<wd l="9322" t="9192" r="9960" b="9336">Volume</wd>

<space/>

<wd l="10037" t="9192" r="10094" b="9331">1</wd>

<space/>

<wd l="10176" t="9192" r="10478" b="9374">(pp.</wd>

<space/>

</ln>

<ln l="6350" t="9422" r="10493" b="9605" baseLine="9557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="9422" r="7157" b="9605">441–448).</wd>

<space/>

<wd l="7248" t="9422" r="8256" b="9605">Stroudsburg,</wd>

<space/>

<wd l="8338" t="9422" r="8635" b="9595">PA,</wd>

<space/>

<wd l="8712" t="9422" r="9154" b="9566">USA:</wd>

<space/>

<wd l="9240" t="9422" r="10186" b="9566">Association</wd>

<space/>

<wd l="10258" t="9422" r="10493" b="9566">for</wd>

<space/>

</ln>

<ln l="6355" t="9653" r="8515" b="9835" baseLine="9787" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="9653" r="7531" b="9835">Computational</wd>

<space/>

<wd l="7589" t="9653" r="8515" b="9835">Linguistics.</wd>

</ln>

</para>

<para l="6120" t="10003" r="10488" b="11798" alignment="justified" li="216" spaceBefore="123" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="10003" r="10478" b="10186" baseLine="10138" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10003" r="6696" b="10176">Koehn,</wd>

<space/>

<wd l="6773" t="10008" r="6979" b="10176">P.,</wd>

<space/>

<wd l="7061" t="10008" r="7632" b="10186">Hoang,</wd>

<space/>

<wd l="7714" t="10008" r="7949" b="10176">H.,</wd>

<space/>

<wd l="8030" t="10003" r="8515" b="10176">Birch,</wd>

<space/>

<wd l="8597" t="10003" r="8832" b="10176">A.,</wd>

<space/>

<wd l="8914" t="10003" r="10171" b="10176">Callison-Burch,</wd>

<space/>

<wd l="10258" t="10003" r="10478" b="10176">C.,</wd>

<space/>

</ln>

<ln l="6350" t="10234" r="10478" b="10416" baseLine="10368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="10234" r="7094" b="10406">Federico,</wd>

<space/>

<wd l="7190" t="10238" r="7459" b="10406">M.,</wd>

<space/>

<wd l="7555" t="10234" r="8256" b="10406">Bertoldi,</wd>

<space/>

<wd l="8347" t="10238" r="8587" b="10406">N.,</wd>

<space/>

<wd l="8702" t="10354" r="8861" b="10378">...</wd>

<space/>

<wd l="8971" t="10234" r="9547" b="10406">Herbst,</wd>

<space/>

<wd l="9643" t="10238" r="9802" b="10378">E.</wd>

<space/>

<wd l="9907" t="10234" r="10478" b="10416">(2007).</wd>

<space/>

</ln>

<ln l="6350" t="10464" r="10483" b="10646" baseLine="10598" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="10469" r="6912" b="10608">Moses:</wd>

<space/>

<wd l="7037" t="10464" r="7464" b="10646">Open</wd>

<space/>

<wd l="7579" t="10464" r="8122" b="10608">Source</wd>

<space/>

<wd l="8232" t="10464" r="8822" b="10608">Toolkit</wd>

<space/>

<wd l="8928" t="10464" r="9163" b="10608">for</wd>

<space/>

<wd l="9274" t="10464" r="10046" b="10608">Statistical</wd>

<space/>

<wd l="10152" t="10469" r="10483" b="10608">Ma-</wd>

</ln>

<ln l="6355" t="10690" r="10483" b="10872" baseLine="10829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="10690" r="6778" b="10834">chine</wd>

<space/>

<wd l="6859" t="10690" r="7805" b="10834">Translation.</wd>

<space/>

<wd l="7896" t="10694" r="8059" b="10829">In</wd>

<space/>

<wd l="8131" t="10690" r="9106" b="10872">Proceedings</wd>

<space/>

<wd l="9187" t="10690" r="9374" b="10834">of</wd>

<space/>

<wd l="9427" t="10690" r="9667" b="10834">the</wd>

<space/>

<wd l="9744" t="10690" r="10104" b="10834">45th</wd>

<space/>

<wd l="10176" t="10690" r="10483" b="10829">An-</wd>

</ln>

<ln l="6350" t="10920" r="10488" b="11102" baseLine="11059" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="10920" r="6686" b="11064">nual</wd>

<space/>

<wd l="6749" t="10920" r="7416" b="11102">Meeting</wd>

<space/>

<wd l="7483" t="10920" r="7670" b="11064">of</wd>

<space/>

<wd l="7704" t="10920" r="7944" b="11064">the</wd>

<space/>

<wd l="8006" t="10920" r="8405" b="11064">ACL</wd>

<space/>

<wd l="8472" t="10963" r="8669" b="11064">on</wd>

<space/>

<wd l="8731" t="10920" r="9576" b="11064">Interactive</wd>

<space/>

<wd l="9638" t="10925" r="10147" b="11064">Poster</wd>

<space/>

<wd l="10205" t="10920" r="10488" b="11064">and</wd>

<space/>

</ln>

<ln l="6350" t="11150" r="10483" b="11333" baseLine="11290" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="11150" r="7536" b="11294">Demonstration</wd>

<space/>

<wd l="7656" t="11150" r="8333" b="11294">Sessions</wd>

<space/>

<wd l="8453" t="11150" r="8755" b="11333">(pp.</wd>

<space/>

<wd l="8899" t="11150" r="9686" b="11333">177–180).</wd>

<space/>

<wd l="9821" t="11150" r="10483" b="11294">Strouds-</wd>

</ln>

<ln l="6346" t="11386" r="10483" b="11568" baseLine="11520" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="11386" r="6758" b="11568">burg,</wd>

<space/>

<wd l="6883" t="11386" r="7181" b="11558">PA,</wd>

<space/>

<wd l="7310" t="11386" r="7752" b="11530">USA:</wd>

<space/>

<wd l="7882" t="11386" r="8827" b="11530">Association</wd>

<space/>

<wd l="8952" t="11386" r="9182" b="11530">for</wd>

<space/>

<wd l="9302" t="11386" r="10483" b="11568">Computational</wd>

<space/>

</ln>

<ln l="6350" t="11616" r="7272" b="11798" baseLine="11750" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="11616" r="7272" b="11798">Linguistics.</wd>

</ln>

</para>

<para l="6120" t="11962" r="10483" b="13066" alignment="justified" li="216" spaceBefore="118" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="11962" r="10474" b="12144" baseLine="12096" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11962" r="6442" b="12134">Lin,</wd>

<space/>

<wd l="6562" t="11966" r="6797" b="12134">H.,</wd>

<space/>

<wd l="6917" t="11962" r="7526" b="12134">Bilmes,</wd>

<space/>

<wd l="7646" t="11966" r="7819" b="12134">J.,</wd>

<space/>

<wd l="7939" t="11962" r="8602" b="12144">Vergyri,</wd>

<space/>

<wd l="8722" t="11966" r="8957" b="12134">D.,</wd>

<space/>

<wd l="9082" t="11962" r="9226" b="12106">&amp;</wd>

<space/>

<wd l="9346" t="11962" r="10171" b="12134">Kirchhoff,</wd>

<space/>

<wd l="10291" t="11966" r="10474" b="12106">K.</wd>

<space/>

</ln>

<ln l="6355" t="12192" r="10483" b="12374" baseLine="12326" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="12192" r="6922" b="12374">(2007).</wd>

<space/>

<wd l="7003" t="12192" r="7430" b="12336">OOV</wd>

<space/>

<wd l="7498" t="12192" r="8227" b="12336">detection</wd>

<space/>

<wd l="8280" t="12192" r="8491" b="12374">by</wd>

<space/>

<wd l="8530" t="12192" r="8918" b="12374">joint</wd>

<space/>

<wd l="8981" t="12192" r="9931" b="12374">word/phone</wd>

<space/>

<wd l="10003" t="12192" r="10483" b="12336">lattice</wd>

<space/>

</ln>

<ln l="6355" t="12422" r="10483" b="12605" baseLine="12557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="12422" r="7186" b="12605">alignment.</wd>

<space/>

<wd l="7282" t="12427" r="7450" b="12562">In</wd>

<space/>

<wd l="7531" t="12422" r="8371" b="12566">Automatic</wd>

<space/>

<wd l="8462" t="12422" r="9034" b="12605">Speech</wd>

<space/>

<wd l="9115" t="12422" r="10094" b="12605">Recognition</wd>

<space/>

<wd l="10176" t="12427" r="10483" b="12566">Un-</wd>

</ln>

<ln l="6355" t="12653" r="10478" b="12835" baseLine="12787" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="12653" r="7325" b="12835">derstanding,</wd>

<space/>

<wd l="7387" t="12653" r="7829" b="12797">2007.</wd>

<space/>

<wd l="7896" t="12653" r="8462" b="12797">ASRU.</wd>

<space/>

<wd l="8539" t="12658" r="8966" b="12792">IEEE</wd>

<space/>

<wd l="9024" t="12653" r="9850" b="12835">Workshop</wd>

<space/>

<wd l="9917" t="12696" r="10114" b="12797">on</wd>

<space/>

<wd l="10176" t="12653" r="10478" b="12835">(pp.</wd>

<space/>

</ln>

<ln l="6350" t="12883" r="9960" b="13066" baseLine="13018" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="12883" r="7157" b="13066">478–483).</wd>

<space/>

<wd l="7224" t="12883" r="9960" b="13027">doi:10.1109/ASRU.2007.4430159</wd>

</ln>

</para>

<para l="6120" t="13234" r="10512" b="15024" alignment="justified" li="216" spaceBefore="123" fli="-216" lsp="exactly" lspExact="228" language="en">

<ln l="6120" t="13234" r="10483" b="13416" baseLine="13368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="13234" r="6442" b="13406">Liu,</wd>

<space/>

<wd l="6509" t="13238" r="6710" b="13406">F.,</wd>

<space/>

<wd l="6778" t="13238" r="7296" b="13416">Weng,</wd>

<space/>

<wd l="7368" t="13238" r="7570" b="13406">F.,</wd>

<space/>

<wd l="7637" t="13238" r="8155" b="13416">Wang,</wd>

<space/>

<wd l="8222" t="13238" r="8453" b="13406">B.,</wd>

<space/>

<wd l="8525" t="13234" r="8669" b="13378">&amp;</wd>

<space/>

<wd l="8736" t="13234" r="9053" b="13406">Liu,</wd>

<space/>

<wd l="9120" t="13238" r="9302" b="13378">Y.</wd>

<space/>

<wd l="9379" t="13234" r="9950" b="13416">(2011).</wd>

<space/>

<wd l="10027" t="13238" r="10483" b="13378">Inser-</wd>

</ln>

<ln l="6350" t="13464" r="10493" b="13646" baseLine="13598" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="13464" r="6701" b="13637">tion,</wd>

<space/>

<wd l="6787" t="13464" r="7517" b="13637">Deletion,</wd>

<space/>

<wd l="7603" t="13507" r="7771" b="13608">or</wd>

<space/>

<wd l="7853" t="13464" r="8947" b="13608">Substitution?:</wd>

<space/>

<wd l="9029" t="13464" r="10042" b="13646">Normalizing</wd>

<space/>

<wd l="10128" t="13469" r="10493" b="13608">Text</wd>

<space/>

</ln>

<ln l="6350" t="13694" r="10483" b="13877" baseLine="13829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="13699" r="7118" b="13877">Messages</wd>

<space/>

<wd l="7195" t="13694" r="7853" b="13838">Without</wd>

<space/>

<wd l="7925" t="13694" r="9394" b="13877">Pre-categorization</wd>

<space/>

<wd l="9466" t="13738" r="9734" b="13838">nor</wd>

<space/>

<wd l="9811" t="13694" r="10483" b="13877">Supervi-</wd>

</ln>

<ln l="6360" t="13925" r="10512" b="14107" baseLine="14059" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="13925" r="6720" b="14069">sion.</wd>

<space/>

<wd l="6787" t="13930" r="6950" b="14064">In</wd>

<space/>

<wd l="7003" t="13925" r="7973" b="14107">Proceedings</wd>

<space/>

<wd l="8035" t="13925" r="8227" b="14069">of</wd>

<space/>

<wd l="8251" t="13925" r="8496" b="14069">the</wd>

<space/>

<wd l="8549" t="13925" r="8909" b="14069">49th</wd>

<space/>

<wd l="8962" t="13925" r="9542" b="14069">Annual</wd>

<space/>

<wd l="9600" t="13925" r="10262" b="14107">Meeting</wd>

<space/>

<wd l="10325" t="13925" r="10512" b="14069">of</wd>

<space/>

</ln>

<ln l="6350" t="14155" r="10474" b="14338" baseLine="14290" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14155" r="6590" b="14299">the</wd>

<space/>

<wd l="6739" t="14155" r="7685" b="14299">Association</wd>

<space/>

<wd l="7834" t="14155" r="8069" b="14299">for</wd>

<space/>

<wd l="8213" t="14155" r="9394" b="14338">Computational</wd>

<space/>

<wd l="9542" t="14155" r="10474" b="14338">Linguistics:</wd>

<space/>

</ln>

<ln l="6350" t="14386" r="10488" b="14568" baseLine="14520" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14390" r="6936" b="14530">Human</wd>

<space/>

<wd l="7056" t="14390" r="7838" b="14568">Language</wd>

<space/>

<wd l="7968" t="14386" r="9082" b="14568">Technologies:</wd>

<space/>

<wd l="9226" t="14386" r="9653" b="14530">Short</wd>

<space/>

<wd l="9768" t="14390" r="10301" b="14568">Papers</wd>

<space/>

<wd l="10430" t="14472" r="10488" b="14491">-</wd>

<space/>

</ln>

<ln l="6350" t="14616" r="10483" b="14798" baseLine="14750" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14616" r="6989" b="14760">Volume</wd>

<space/>

<wd l="7046" t="14616" r="7138" b="14755">2</wd>

<space/>

<wd l="7205" t="14616" r="7507" b="14798">(pp.</wd>

<space/>

<wd l="7579" t="14616" r="8179" b="14798">71–76).</wd>

<space/>

<wd l="8256" t="14616" r="9264" b="14798">Stroudsburg,</wd>

<space/>

<wd l="9326" t="14616" r="9624" b="14789">PA,</wd>

<space/>

<wd l="9686" t="14616" r="10128" b="14760">USA:</wd>

<space/>

<wd l="10200" t="14616" r="10483" b="14760">As-</wd>

</ln>

<ln l="6360" t="14842" r="9566" b="15024" baseLine="14981" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="14842" r="7070" b="14986">sociation</wd>

<space/>

<wd l="7123" t="14842" r="7358" b="14986">for</wd>

<space/>

<wd l="7406" t="14842" r="8587" b="15024">Computational</wd>

<space/>

<wd l="8645" t="14842" r="9566" b="15024">Linguistics.</wd>

</ln>

</para>

</column>

</section>

<dd l="1294" t="15736" r="10520" b="15977">

<para l="5804" t="15787" r="6148" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15787" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="41">

<wd l="5870" t="15787" r="6082" b="15946">26</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4303.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1294" marginTop="1416" marginRight="1389" marginBottom="1302" offsetX="-6" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1294" t="1416" r="10520" b="15387">

<column l="1294" t="1416" r="5897" b="15387">

<para l="1416" t="1459" r="5779" b="2558" alignment="justified" li="360" ri="72" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1416" t="1459" r="5774" b="1642" baseLine="1594" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="1464" r="1915" b="1642">Lopez</wd>

<space/>

<wd l="1982" t="1459" r="2626" b="1632">Ludeiia,</wd>

<space/>

<wd l="2702" t="1464" r="2938" b="1632">V.,</wd>

<space/>

<wd l="3024" t="1459" r="3317" b="1603">San</wd>

<space/>

<wd l="3389" t="1459" r="4128" b="1642">Segundo,</wd>

<space/>

<wd l="4200" t="1464" r="4426" b="1632">R.,</wd>

<space/>

<wd l="4502" t="1464" r="5232" b="1632">Montero,</wd>

<space/>

<wd l="5309" t="1464" r="5424" b="1603">J.</wd>

<space/>

<wd l="5506" t="1464" r="5774" b="1632">M.,</wd>

<space/>

</ln>

<ln l="1646" t="1690" r="5779" b="1872" baseLine="1824" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="1694" r="2093" b="1834">Barra</wd>

<space/>

<wd l="2160" t="1690" r="2818" b="1862">Chicote,</wd>

<space/>

<wd l="2890" t="1694" r="3115" b="1862">R.,</wd>

<space/>

<wd l="3192" t="1690" r="3336" b="1834">&amp;</wd>

<space/>

<wd l="3408" t="1694" r="4118" b="1862">Lorenzo,</wd>

<space/>

<wd l="4190" t="1694" r="4306" b="1834">J.</wd>

<space/>

<wd l="4387" t="1690" r="4954" b="1872">(2012).</wd>

<space/>

<wd l="5030" t="1690" r="5779" b="1834">Architec-</wd>

</ln>

<ln l="1646" t="1915" r="5779" b="2098" baseLine="2054" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="1934" r="1954" b="2059">ture</wd>

<space/>

<wd l="2045" t="1915" r="2280" b="2059">for</wd>

<space/>

<wd l="2366" t="1920" r="2731" b="2059">Text</wd>

<space/>

<wd l="2808" t="1915" r="3970" b="2059">Normalization</wd>

<space/>

<wd l="4056" t="1915" r="4483" b="2098">using</wd>

<space/>

<wd l="4584" t="1915" r="5357" b="2059">Statistical</wd>

<space/>

<wd l="5448" t="1920" r="5779" b="2059">Ma-</wd>

</ln>

<ln l="1651" t="2146" r="5774" b="2328" baseLine="2285" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2146" r="2074" b="2290">chine</wd>

<space/>

<wd l="2131" t="2146" r="3038" b="2290">Translation</wd>

<space/>

<wd l="3091" t="2146" r="3984" b="2328">techniques.</wd>

<space/>

<wd l="4051" t="2150" r="4214" b="2285">In</wd>

<space/>

<wd l="4267" t="2146" r="5333" b="2290">IberSPEECH</wd>

<space/>

<wd l="5381" t="2146" r="5774" b="2290">2012</wd>

<space/>

</ln>

<ln l="1651" t="2376" r="4853" b="2558" baseLine="2515" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2376" r="1954" b="2558">(pp.</wd>

<space/>

<wd l="2030" t="2376" r="2818" b="2558">112–122).</wd>

<space/>

<wd l="2880" t="2376" r="3514" b="2549">Madrid,</wd>

<space/>

<wd l="3581" t="2376" r="4066" b="2558">Spain:</wd>

<space/>

<wd l="4138" t="2376" r="4853" b="2558">Springer.</wd>

</ln>

</para>

<para l="1416" t="2726" r="5779" b="3331" alignment="justified" li="360" ri="72" spaceBefore="115" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1416" t="2726" r="5779" b="2909" baseLine="2866" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="2726" r="2179" b="2909">Manning,</wd>

<space/>

<wd l="2246" t="2726" r="2414" b="2870">C.</wd>

<space/>

<wd l="2477" t="2731" r="2717" b="2899">D.,</wd>

<space/>

<wd l="2784" t="2726" r="2928" b="2870">&amp;</wd>

<space/>

<wd l="2986" t="2726" r="3826" b="2909">Raghavan,</wd>

<space/>

<wd l="3888" t="2731" r="4042" b="2870">P.</wd>

<space/>

<wd l="4109" t="2726" r="4680" b="2909">(2009).</wd>

<space/>

<wd l="4742" t="2726" r="4987" b="2866">An</wd>

<space/>

<wd l="5040" t="2726" r="5779" b="2870">Introduc-</wd>

</ln>

<ln l="1646" t="2957" r="5770" b="3101" baseLine="3096" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="2957" r="1958" b="3101">tion</wd>

<space/>

<wd l="2290" t="2976" r="2443" b="3101">to</wd>

<space/>

<wd l="2789" t="2957" r="3734" b="3101">Information</wd>

<space/>

<wd l="4070" t="2957" r="4838" b="3101">Retrieval.</wd>

<space/>

<wd l="5194" t="2957" r="5770" b="3101">Online.</wd>

<space/>

</ln>

<ln l="1651" t="3187" r="4214" b="3331" baseLine="3326" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="3187" r="4214" b="3331">doi:10.1109/LPT.2009.2020494</wd>

</ln>

</para>

<para l="1416" t="3538" r="5784" b="4642" alignment="justified" li="360" ri="72" spaceBefore="108" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1416" t="3538" r="5770" b="3720" baseLine="3672" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3538" r="2203" b="3710">Marneffe,</wd>

<space/>

<wd l="2275" t="3538" r="2741" b="3682">M.-C.</wd>

<space/>

<wd l="2827" t="3538" r="3053" b="3710">de,</wd>

<space/>

<wd l="3130" t="3538" r="4157" b="3720">MacCartney,</wd>

<space/>

<wd l="4234" t="3542" r="4459" b="3710">B.,</wd>

<space/>

<wd l="4541" t="3538" r="4685" b="3682">&amp;</wd>

<space/>

<wd l="4757" t="3538" r="5520" b="3720">Manning,</wd>

<space/>

<wd l="5602" t="3538" r="5770" b="3682">C.</wd>

<space/>

</ln>

<ln l="1646" t="3768" r="5779" b="3950" baseLine="3902" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="3773" r="1829" b="3912">D.</wd>

<space/>

<wd l="1958" t="3768" r="2530" b="3950">(2006).</wd>

<space/>

<wd l="2659" t="3768" r="3538" b="3950">Generating</wd>

<space/>

<wd l="3653" t="3768" r="4099" b="3950">typed</wd>

<space/>

<wd l="4219" t="3768" r="5174" b="3950">dependency</wd>

<space/>

<wd l="5280" t="3811" r="5779" b="3950">parses</wd>

<space/>

</ln>

<ln l="1651" t="3998" r="5779" b="4181" baseLine="4133" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="3998" r="2040" b="4142">from</wd>

<space/>

<wd l="2107" t="3998" r="2630" b="4181">phrase</wd>

<space/>

<wd l="2722" t="4018" r="3413" b="4142">structure</wd>

<space/>

<wd l="3490" t="4042" r="4032" b="4181">parses.</wd>

<space/>

<wd l="4133" t="4003" r="4296" b="4138">In</wd>

<space/>

<wd l="4378" t="3998" r="4680" b="4142">The</wd>

<space/>

<wd l="4766" t="3998" r="5779" b="4142">International</wd>

<space/>

</ln>

<ln l="1651" t="4229" r="5784" b="4411" baseLine="4363" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="4229" r="2563" b="4373">Conference</wd>

<space/>

<wd l="2621" t="4272" r="2818" b="4373">on</wd>

<space/>

<wd l="2866" t="4234" r="3653" b="4411">Language</wd>

<space/>

<wd l="3706" t="4234" r="4522" b="4373">Resources</wd>

<space/>

<wd l="4584" t="4229" r="4867" b="4373">and</wd>

<space/>

<wd l="4915" t="4229" r="5784" b="4373">Evaluation</wd>

<space/>

</ln>

<ln l="1651" t="4459" r="4699" b="4642" baseLine="4594" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="4459" r="2285" b="4642">(LREC)</wd>

<space/>

<wd l="2342" t="4459" r="2645" b="4642">(pp.</wd>

<space/>

<wd l="2707" t="4459" r="3514" b="4642">449–454).</wd>

<space/>

<wd l="3581" t="4459" r="4238" b="4632">Genova,</wd>

<space/>

<wd l="4301" t="4459" r="4699" b="4642">Italy.</wd>

</ln>

</para>

<para l="1416" t="4810" r="5784" b="5448" alignment="justified" li="360" ri="72" spaceBefore="115" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1416" t="4810" r="5774" b="4992" baseLine="4944" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="4814" r="1901" b="4992">Mays,</wd>

<space/>

<wd l="1997" t="4814" r="2213" b="4982">E.,</wd>

<space/>

<wd l="2309" t="4814" r="3086" b="4982">Damerau,</wd>

<space/>

<wd l="3182" t="4814" r="3331" b="4954">F.</wd>

<space/>

<wd l="3432" t="4814" r="3605" b="4982">J.,</wd>

<space/>

<wd l="3706" t="4810" r="3850" b="4954">&amp;</wd>

<space/>

<wd l="3946" t="4814" r="4570" b="4982">Mercer,</wd>

<space/>

<wd l="4666" t="4814" r="4834" b="4954">R.</wd>

<space/>

<wd l="4939" t="4814" r="5098" b="4954">L.</wd>

<space/>

<wd l="5203" t="4810" r="5774" b="4992">(1991).</wd>

<space/>

</ln>

<ln l="1651" t="5035" r="5784" b="5218" baseLine="5174" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="5035" r="2280" b="5179">Context</wd>

<space/>

<wd l="2424" t="5035" r="2885" b="5179">based</wd>

<space/>

<wd l="3043" t="5035" r="3662" b="5218">spelling</wd>

<space/>

<wd l="3821" t="5035" r="4666" b="5179">correction.</wd>

<space/>

<wd l="4834" t="5035" r="5784" b="5179">Information</wd>

<space/>

</ln>

<ln l="1646" t="5266" r="5184" b="5448" baseLine="5405" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="5266" r="2506" b="5448">Processing</wd>

<space/>

<wd l="2568" t="5266" r="2712" b="5410">&amp;</wd>

<space/>

<wd l="2765" t="5270" r="3854" b="5448">Management,</wd>

<space/>

<wd l="3912" t="5266" r="4392" b="5448">27(5),</wd>

<space/>

<wd l="4450" t="5266" r="5184" b="5410">517–522.</wd>

</ln>

</para>

<para l="1411" t="5616" r="5789" b="6720" alignment="justified" li="360" ri="72" spaceBefore="107" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1411" t="5616" r="5770" b="5798" baseLine="5755" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="5616" r="1992" b="5789">Novak,</wd>

<space/>

<wd l="2093" t="5621" r="2266" b="5789">J.,</wd>

<space/>

<wd l="2366" t="5621" r="2842" b="5798">Yang,</wd>

<space/>

<wd l="2942" t="5621" r="3178" b="5789">D.,</wd>

<space/>

<wd l="3278" t="5616" r="4219" b="5789">Minematsu,</wd>

<space/>

<wd l="4315" t="5621" r="4560" b="5789">N.,</wd>

<space/>

<wd l="4666" t="5616" r="4810" b="5760">&amp;</wd>

<space/>

<wd l="4910" t="5616" r="5486" b="5789">Hirose,</wd>

<space/>

<wd l="5587" t="5621" r="5770" b="5760">K.</wd>

<space/>

</ln>

<ln l="1651" t="5851" r="5770" b="6034" baseLine="5986" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="5851" r="2218" b="6034">(2011).</wd>

<space/>

<wd l="2299" t="5851" r="3461" b="5995">Phonetisaurus:</wd>

<space/>

<wd l="3542" t="5851" r="3686" b="5990">A</wd>

<space/>

<wd l="3758" t="5851" r="4680" b="5995">wfst-driven</wd>

<space/>

<wd l="4742" t="5851" r="5770" b="6034">phoneticizer.</wd>

<space/>

</ln>

<ln l="1651" t="6082" r="5779" b="6264" baseLine="6216" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="6082" r="1954" b="6226">The</wd>

<space/>

<wd l="2021" t="6082" r="2866" b="6264">University</wd>

<space/>

<wd l="2928" t="6082" r="3115" b="6226">of</wd>

<space/>

<wd l="3154" t="6082" r="3715" b="6264">Tokyo,</wd>

<space/>

<wd l="3792" t="6082" r="4301" b="6264">Tokyo</wd>

<space/>

<wd l="4373" t="6082" r="5021" b="6226">Institute</wd>

<space/>

<wd l="5093" t="6082" r="5280" b="6226">of</wd>

<space/>

<wd l="5318" t="6082" r="5779" b="6226">Tech-</wd>

</ln>

<ln l="1646" t="6312" r="5789" b="6494" baseLine="6446" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="6312" r="2237" b="6494">nology.</wd>

<space/>

<wd l="2477" t="6312" r="3254" b="6456">Retrieved</wd>

<space/>

<wd l="3485" t="6317" r="4109" b="6494">January</wd>

<space/>

<wd l="4354" t="6312" r="4478" b="6485">1,</wd>

<space/>

<wd l="4718" t="6312" r="5160" b="6485">2014,</wd>

<space/>

<wd l="5405" t="6312" r="5789" b="6456">from</wd>

<space/>

</ln>

<ln l="1646" t="6538" r="4867" b="6720" baseLine="6672" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="6538" r="4867" b="6720">http://code.google.com/p/phonetisaurus/</wd>

</ln>

</para>

<para l="1421" t="6888" r="5779" b="8222" alignment="justified" li="360" ri="72" spaceBefore="110" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1421" t="6888" r="5779" b="7070" baseLine="7022" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6888" r="1790" b="7061">Och,</wd>

<space/>

<wd l="1858" t="6893" r="2006" b="7032">F.</wd>

<space/>

<wd l="2074" t="6893" r="2246" b="7061">J.,</wd>

<space/>

<wd l="2314" t="6888" r="2458" b="7032">&amp;</wd>

<space/>

<wd l="2515" t="6893" r="2894" b="7070">Ney,</wd>

<space/>

<wd l="2957" t="6893" r="3139" b="7032">H.</wd>

<space/>

<wd l="3216" t="6888" r="3782" b="7070">(2002).</wd>

<space/>

<wd l="3850" t="6888" r="5030" b="7032">Discriminative</wd>

<space/>

<wd l="5098" t="6888" r="5779" b="7070">Training</wd>

<space/>

</ln>

<ln l="1651" t="7118" r="5779" b="7301" baseLine="7253" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="7118" r="1934" b="7262">and</wd>

<space/>

<wd l="2002" t="7118" r="2837" b="7262">Maximum</wd>

<space/>

<wd l="2904" t="7123" r="3552" b="7301">Entropy</wd>

<space/>

<wd l="3619" t="7118" r="4214" b="7262">Models</wd>

<space/>

<wd l="4291" t="7118" r="4526" b="7262">for</wd>

<space/>

<wd l="4598" t="7118" r="5376" b="7262">Statistical</wd>

<space/>

<wd l="5448" t="7123" r="5779" b="7262">Ma-</wd>

</ln>

<ln l="1651" t="7349" r="5779" b="7531" baseLine="7483" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="7349" r="2074" b="7493">chine</wd>

<space/>

<wd l="2155" t="7349" r="3101" b="7493">Translation.</wd>

<space/>

<wd l="3192" t="7354" r="3355" b="7488">In</wd>

<space/>

<wd l="3427" t="7349" r="4402" b="7531">Proceedings</wd>

<space/>

<wd l="4483" t="7349" r="4670" b="7493">of</wd>

<space/>

<wd l="4723" t="7349" r="4963" b="7493">the</wd>

<space/>

<wd l="5040" t="7349" r="5400" b="7493">40th</wd>

<space/>

<wd l="5472" t="7349" r="5779" b="7488">An-</wd>

</ln>

<ln l="1646" t="7579" r="5779" b="7762" baseLine="7714" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="7579" r="1982" b="7723">nual</wd>

<space/>

<wd l="2098" t="7579" r="2765" b="7762">Meeting</wd>

<space/>

<wd l="2885" t="7622" r="3082" b="7723">on</wd>

<space/>

<wd l="3192" t="7579" r="4138" b="7723">Association</wd>

<space/>

<wd l="4253" t="7579" r="4488" b="7723">for</wd>

<space/>

<wd l="4598" t="7579" r="5779" b="7762">Computational</wd>

<space/>

</ln>

<ln l="1646" t="7810" r="5770" b="7992" baseLine="7944" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="7810" r="2525" b="7992">Linguistics</wd>

<space/>

<wd l="2602" t="7810" r="2904" b="7992">(pp.</wd>

<space/>

<wd l="2981" t="7810" r="3787" b="7992">295–302).</wd>

<space/>

<wd l="3874" t="7810" r="4882" b="7992">Stroudsburg,</wd>

<space/>

<wd l="4954" t="7810" r="5256" b="7982">PA,</wd>

<space/>

<wd l="5328" t="7810" r="5770" b="7954">USA:</wd>

<space/>

</ln>

<ln l="1646" t="8040" r="5088" b="8222" baseLine="8174" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="8040" r="2592" b="8184">Association</wd>

<space/>

<wd l="2645" t="8040" r="2875" b="8184">for</wd>

<space/>

<wd l="2928" t="8040" r="4109" b="8222">Computational</wd>

<space/>

<wd l="4162" t="8040" r="5088" b="8222">Linguistics.</wd>

</ln>

</para>

<para l="1421" t="8390" r="5779" b="9034" alignment="justified" li="360" ri="72" spaceBefore="116" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1421" t="8390" r="5779" b="8573" baseLine="8525" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8390" r="1790" b="8563">Och,</wd>

<space/>

<wd l="1858" t="8395" r="2006" b="8534">F.</wd>

<space/>

<wd l="2078" t="8395" r="2251" b="8563">J.,</wd>

<space/>

<wd l="2323" t="8390" r="2467" b="8534">&amp;</wd>

<space/>

<wd l="2530" t="8395" r="2909" b="8573">Ney,</wd>

<space/>

<wd l="2976" t="8395" r="3158" b="8534">H.</wd>

<space/>

<wd l="3235" t="8390" r="3806" b="8573">(2003).</wd>

<space/>

<wd l="3874" t="8390" r="4018" b="8530">A</wd>

<space/>

<wd l="4085" t="8390" r="4954" b="8573">Systematic</wd>

<space/>

<wd l="5021" t="8390" r="5779" b="8573">Compari-</wd>

</ln>

<ln l="1656" t="8621" r="5770" b="8803" baseLine="8755" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="8664" r="1925" b="8765">son</wd>

<space/>

<wd l="2078" t="8621" r="2266" b="8765">of</wd>

<space/>

<wd l="2395" t="8621" r="3024" b="8765">Various</wd>

<space/>

<wd l="3192" t="8621" r="3965" b="8765">Statistical</wd>

<space/>

<wd l="4123" t="8621" r="4978" b="8803">Alignment</wd>

<space/>

<wd l="5131" t="8621" r="5770" b="8765">Models.</wd>

<space/>

</ln>

<ln l="1651" t="8851" r="4282" b="9034" baseLine="8986" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="8851" r="2328" b="9034">Comput.</wd>

<space/>

<wd l="2390" t="8851" r="3149" b="9034">Linguist.,</wd>

<space/>

<wd l="3206" t="8851" r="3686" b="9034">29(1),</wd>

<space/>

<wd l="3758" t="8851" r="4282" b="8995">19–51.</wd>

</ln>

</para>

<para l="1421" t="9202" r="5779" b="10070" alignment="justified" li="360" ri="72" spaceBefore="113" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1421" t="9202" r="5774" b="9384" baseLine="9336" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9202" r="1901" b="9374">Oliva,</wd>

<space/>

<wd l="1963" t="9206" r="2136" b="9374">J.,</wd>

<space/>

<wd l="2203" t="9202" r="2861" b="9374">Serrano,</wd>

<space/>

<wd l="2918" t="9206" r="3038" b="9346">J.</wd>

<space/>

<wd l="3106" t="9206" r="3264" b="9374">I.,</wd>

<space/>

<wd l="3322" t="9202" r="3610" b="9346">Del</wd>

<space/>

<wd l="3667" t="9202" r="4330" b="9374">Castillo,</wd>

<space/>

<wd l="4387" t="9206" r="4603" b="9346">M.</wd>

<space/>

<wd l="4670" t="9206" r="4906" b="9374">D.,</wd>

<space/>

<wd l="4973" t="9202" r="5117" b="9346">&amp;</wd>

<space/>

<wd l="5179" t="9202" r="5774" b="9384">Igesias,</wd>

<space/>

</ln>

<ln l="1646" t="9389" r="5779" b="9610" baseLine="9566" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="9389" r="1824" b="9571">Á.</wd>

<space/>

<wd l="1906" t="9427" r="2477" b="9610">(2013).</wd>

<space/>

<wd l="2554" t="9427" r="2698" b="9566">A</wd>

<space/>

<wd l="2770" t="9427" r="3149" b="9571">SMS</wd>

<space/>

<wd l="3216" t="9427" r="4378" b="9571">Normalization</wd>

<space/>

<wd l="4450" t="9427" r="5035" b="9610">System</wd>

<space/>

<wd l="5098" t="9432" r="5779" b="9610">Integrat-</wd>

</ln>

<ln l="1651" t="9658" r="5779" b="9840" baseLine="9797" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9658" r="1896" b="9840">ing</wd>

<space/>

<wd l="1958" t="9658" r="2645" b="9840">Multiple</wd>

<space/>

<wd l="2712" t="9658" r="3749" b="9802">Grammatical</wd>

<space/>

<wd l="3811" t="9662" r="4670" b="9802">Resources.</wd>

<space/>

<wd l="4738" t="9658" r="5342" b="9802">Natural</wd>

<space/>

<wd l="5405" t="9662" r="5779" b="9802">Lan-</wd>

</ln>

<ln l="1651" t="9888" r="4622" b="10070" baseLine="10027" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9931" r="2117" b="10070">guage</wd>

<space/>

<wd l="2170" t="9888" r="3192" b="10070">Engineering,</wd>

<space/>

<wd l="3269" t="9888" r="3830" b="10070">19(01),</wd>

<space/>

<wd l="3902" t="9888" r="4622" b="10032">121–141.</wd>

</ln>

</para>

<para l="1416" t="10238" r="5784" b="11573" alignment="justified" li="360" ri="72" spaceBefore="106" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1416" t="10238" r="5779" b="10421" baseLine="10378" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10238" r="2160" b="10421">Papineni,</wd>

<space/>

<wd l="2242" t="10243" r="2482" b="10411">K.,</wd>

<space/>

<wd l="2563" t="10238" r="3216" b="10411">Roukos,</wd>

<space/>

<wd l="3307" t="10238" r="3504" b="10411">S.,</wd>

<space/>

<wd l="3590" t="10238" r="3734" b="10382">&amp;</wd>

<space/>

<wd l="3816" t="10238" r="4306" b="10411">Ward,</wd>

<space/>

<wd l="4392" t="10243" r="4550" b="10382">T.</wd>

<space/>

<wd l="4642" t="10238" r="5208" b="10421">(1998).</wd>

<space/>

<wd l="5294" t="10238" r="5779" b="10382">Maxi-</wd>

</ln>

<ln l="1646" t="10469" r="5779" b="10651" baseLine="10608" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="10512" r="2059" b="10613">mum</wd>

<space/>

<wd l="2136" t="10469" r="2947" b="10613">likelihood</wd>

<space/>

<wd l="3029" t="10469" r="3312" b="10613">and</wd>

<space/>

<wd l="3398" t="10469" r="4531" b="10613">discriminative</wd>

<space/>

<wd l="4613" t="10469" r="5232" b="10651">training</wd>

<space/>

<wd l="5318" t="10469" r="5506" b="10613">of</wd>

<space/>

<wd l="5563" t="10469" r="5779" b="10613">di-</wd>

</ln>

<ln l="1646" t="10699" r="5784" b="10882" baseLine="10838" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="10718" r="1949" b="10843">rect</wd>

<space/>

<wd l="2030" t="10699" r="2870" b="10843">translation</wd>

<space/>

<wd l="2957" t="10699" r="3566" b="10843">models.</wd>

<space/>

<wd l="3667" t="10704" r="3830" b="10838">In</wd>

<space/>

<wd l="3912" t="10699" r="4742" b="10872">Acoustics,</wd>

<space/>

<wd l="4843" t="10699" r="5414" b="10882">Speech</wd>

<space/>

<wd l="5501" t="10699" r="5784" b="10843">and</wd>

<space/>

</ln>

<ln l="1656" t="10930" r="5774" b="11112" baseLine="11064" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="10930" r="2150" b="11112">Signal</wd>

<space/>

<wd l="2232" t="10930" r="3144" b="11112">Processing,</wd>

<space/>

<wd l="3250" t="10930" r="3672" b="11074">1998.</wd>

<space/>

<wd l="3758" t="10930" r="4733" b="11112">Proceedings</wd>

<space/>

<wd l="4819" t="10930" r="5006" b="11074">of</wd>

<space/>

<wd l="5059" t="10930" r="5299" b="11074">the</wd>

<space/>

<wd l="5400" t="10930" r="5774" b="11074">1998</wd>

<space/>

</ln>

<ln l="1651" t="11160" r="5774" b="11342" baseLine="11294" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="11165" r="2078" b="11299">IEEE</wd>

<space/>

<wd l="2208" t="11160" r="3221" b="11304">International</wd>

<space/>

<wd l="3350" t="11160" r="4267" b="11304">Conference</wd>

<space/>

<wd l="4402" t="11203" r="4598" b="11304">on</wd>

<space/>

<wd l="4723" t="11160" r="5126" b="11342">(Vol.</wd>

<space/>

<wd l="5280" t="11160" r="5405" b="11333">1,</wd>

<space/>

<wd l="5530" t="11203" r="5774" b="11342">pp.</wd>

<space/>

</ln>

<ln l="1666" t="11390" r="2909" b="11573" baseLine="11525" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1666" t="11390" r="2342" b="11534">189–192</wd>

<space/>

<wd l="2400" t="11390" r="2909" b="11573">vol.1).</wd>

</ln>

</para>

<para l="1416" t="11741" r="5779" b="13070" alignment="justified" li="360" ri="72" spaceBefore="110" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1416" t="11741" r="5770" b="11923" baseLine="11875" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11741" r="2160" b="11923">Papineni,</wd>

<space/>

<wd l="2270" t="11746" r="2506" b="11914">K.,</wd>

<space/>

<wd l="2616" t="11741" r="3269" b="11914">Roukos,</wd>

<space/>

<wd l="3389" t="11741" r="3586" b="11914">S.,</wd>

<space/>

<wd l="3696" t="11741" r="4186" b="11914">Ward,</wd>

<space/>

<wd l="4296" t="11746" r="4507" b="11914">T.,</wd>

<space/>

<wd l="4622" t="11741" r="4766" b="11885">&amp;</wd>

<space/>

<wd l="4877" t="11741" r="5237" b="11914">Zhu,</wd>

<space/>

<wd l="5347" t="11746" r="5770" b="11885">W.-J.</wd>

<space/>

</ln>

<ln l="1651" t="11971" r="5779" b="12154" baseLine="12106" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="11971" r="2218" b="12154">(2002).</wd>

<space/>

<wd l="2314" t="11976" r="2880" b="12115">BLEU:</wd>

<space/>

<wd l="2976" t="11971" r="3120" b="12110">A</wd>

<space/>

<wd l="3206" t="11971" r="3826" b="12115">Method</wd>

<space/>

<wd l="3917" t="11971" r="4152" b="12115">for</wd>

<space/>

<wd l="4234" t="11971" r="5074" b="12115">Automatic</wd>

<space/>

<wd l="5160" t="11971" r="5779" b="12115">Evalua-</wd>

</ln>

<ln l="1646" t="12197" r="5779" b="12379" baseLine="12336" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12197" r="1958" b="12341">tion</wd>

<space/>

<wd l="2026" t="12197" r="2213" b="12341">of</wd>

<space/>

<wd l="2251" t="12197" r="2947" b="12341">Machine</wd>

<space/>

<wd l="3019" t="12197" r="3960" b="12341">Translation.</wd>

<space/>

<wd l="4042" t="12202" r="4205" b="12336">In</wd>

<space/>

<wd l="4267" t="12197" r="5237" b="12379">Proceedings</wd>

<space/>

<wd l="5314" t="12197" r="5501" b="12341">of</wd>

<space/>

<wd l="5539" t="12197" r="5779" b="12341">the</wd>

<space/>

</ln>

<ln l="1646" t="12427" r="5779" b="12610" baseLine="12566" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12427" r="2002" b="12571">40th</wd>

<space/>

<wd l="2059" t="12427" r="2645" b="12571">Annual</wd>

<space/>

<wd l="2702" t="12427" r="3370" b="12610">Meeting</wd>

<space/>

<wd l="3432" t="12470" r="3629" b="12571">on</wd>

<space/>

<wd l="3691" t="12427" r="4632" b="12571">Association</wd>

<space/>

<wd l="4694" t="12427" r="4930" b="12571">for</wd>

<space/>

<wd l="4987" t="12427" r="5779" b="12610">Computa-</wd>

</ln>

<ln l="1646" t="12658" r="5774" b="12840" baseLine="12797" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12658" r="2098" b="12802">tional</wd>

<space/>

<wd l="2165" t="12658" r="3048" b="12840">Linguistics</wd>

<space/>

<wd l="3125" t="12658" r="3427" b="12840">(pp.</wd>

<space/>

<wd l="3509" t="12658" r="4310" b="12840">311–318).</wd>

<space/>

<wd l="4397" t="12658" r="5405" b="12840">Stroudsburg,</wd>

<space/>

<wd l="5477" t="12658" r="5774" b="12830">PA,</wd>

<space/>

</ln>

<ln l="1646" t="12888" r="5592" b="13070" baseLine="13027" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12888" r="2088" b="13032">USA:</wd>

<space/>

<wd l="2150" t="12888" r="3096" b="13032">Association</wd>

<space/>

<wd l="3149" t="12888" r="3384" b="13032">for</wd>

<space/>

<wd l="3432" t="12888" r="4613" b="13070">Computational</wd>

<space/>

<wd l="4670" t="12888" r="5592" b="13070">Linguistics.</wd>

</ln>

</para>

<para l="1416" t="13238" r="5808" b="14074" alignment="justified" li="360" ri="72" spaceBefore="109" fli="-288" lsp="exactly" lspExact="232" language="en">

<ln l="1416" t="13238" r="5808" b="13421" baseLine="13378" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="13238" r="2059" b="13411">Pennell,</wd>

<space/>

<wd l="2155" t="13243" r="2338" b="13382">D.</wd>

<space/>

<wd l="2438" t="13243" r="2654" b="13411">L.,</wd>

<space/>

<wd l="2755" t="13238" r="2899" b="13382">&amp;</wd>

<space/>

<wd l="2995" t="13238" r="3317" b="13411">Liu,</wd>

<space/>

<wd l="3413" t="13243" r="3595" b="13382">Y.</wd>

<space/>

<wd l="3701" t="13238" r="4272" b="13421">(2010).</wd>

<space/>

<wd l="4368" t="13238" r="5525" b="13382">Normalization</wd>

<space/>

<wd l="5621" t="13238" r="5808" b="13382">of</wd>

<space/>

</ln>

<ln l="1646" t="13469" r="5784" b="13651" baseLine="13608" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="13488" r="1944" b="13613">text</wd>

<space/>

<wd l="2006" t="13512" r="2755" b="13651">messages</wd>

<space/>

<wd l="2827" t="13469" r="3058" b="13613">for</wd>

<space/>

<wd l="3115" t="13469" r="4282" b="13651">text-to-speech.</wd>

<space/>

<wd l="4358" t="13469" r="5136" b="13613">Acoustics</wd>

<space/>

<wd l="5213" t="13469" r="5784" b="13651">Speech</wd>

<space/>

</ln>

<ln l="1651" t="13699" r="5779" b="13882" baseLine="13834" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="13699" r="1934" b="13843">and</wd>

<space/>

<wd l="2035" t="13699" r="2530" b="13882">Signal</wd>

<space/>

<wd l="2626" t="13699" r="3490" b="13882">Processing</wd>

<space/>

<wd l="3586" t="13699" r="4440" b="13882">(ICASSP),</wd>

<space/>

<wd l="4536" t="13699" r="4934" b="13843">2010</wd>

<space/>

<wd l="5035" t="13704" r="5458" b="13838">IEEE</wd>

<space/>

<wd l="5558" t="13704" r="5779" b="13838">In-</wd>

</ln>

<ln l="1646" t="13930" r="3758" b="14074" baseLine="14064" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="13930" r="2496" b="14074">ternational</wd>

<space/>

<wd l="2554" t="13930" r="3466" b="14074">Conference</wd>

<space/>

<wd l="3523" t="13973" r="3758" b="14074">on.</wd>

</ln>

</para>

<para l="1426" t="14280" r="5784" b="15379" alignment="justified" li="360" ri="72" spaceBefore="112" fli="-288" lsp="exactly" lspExact="230" language="en">

<ln l="1426" t="14280" r="5784" b="14462" baseLine="14414" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="14280" r="1973" b="14453">Saloot,</wd>

<space/>

<wd l="2035" t="14285" r="2251" b="14424">M.</wd>

<space/>

<wd l="2318" t="14280" r="2554" b="14453">A.,</wd>

<space/>

<wd l="2621" t="14280" r="3024" b="14453">Idris,</wd>

<space/>

<wd l="3082" t="14285" r="3326" b="14453">N.,</wd>

<space/>

<wd l="3394" t="14280" r="3538" b="14424">&amp;</wd>

<space/>

<wd l="3600" t="14280" r="3931" b="14453">Aw,</wd>

<space/>

<wd l="3994" t="14280" r="4176" b="14424">A.</wd>

<space/>

<wd l="4248" t="14280" r="4819" b="14462">(2014).</wd>

<space/>

<wd l="4882" t="14280" r="5362" b="14462">Noisy</wd>

<space/>

<wd l="5419" t="14285" r="5784" b="14424">Text</wd>

<space/>

</ln>

<ln l="1642" t="14510" r="5779" b="14693" baseLine="14645" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="14510" r="2798" b="14654">Normalization</wd>

<space/>

<wd l="2856" t="14510" r="3331" b="14693">Using</wd>

<space/>

<wd l="3398" t="14554" r="3586" b="14654">an</wd>

<space/>

<wd l="3643" t="14510" r="4430" b="14654">Enhanced</wd>

<space/>

<wd l="4493" t="14515" r="5275" b="14693">Language</wd>

<space/>

<wd l="5338" t="14510" r="5779" b="14654">Mod-</wd>

</ln>

<ln l="1651" t="14741" r="5779" b="14923" baseLine="14875" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="14741" r="1829" b="14885">el.</wd>

<space/>

<wd l="1920" t="14746" r="2083" b="14880">In</wd>

<space/>

<wd l="2155" t="14741" r="3125" b="14923">Proceedings</wd>

<space/>

<wd l="3211" t="14741" r="3398" b="14885">of</wd>

<space/>

<wd l="3446" t="14741" r="3686" b="14885">the</wd>

<space/>

<wd l="3768" t="14741" r="4781" b="14885">International</wd>

<space/>

<wd l="4867" t="14741" r="5779" b="14885">Conference</wd>

<space/>

</ln>

<ln l="1651" t="14971" r="5784" b="15154" baseLine="15106" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="15014" r="1848" b="15115">on</wd>

<space/>

<wd l="1930" t="14971" r="2659" b="15115">Artificial</wd>

<space/>

<wd l="2750" t="14971" r="3691" b="15154">Intelligence</wd>

<space/>

<wd l="3782" t="14971" r="4066" b="15115">and</wd>

<space/>

<wd l="4152" t="14976" r="4718" b="15115">Pattern</wd>

<space/>

<wd l="4805" t="14971" r="5784" b="15154">Recognition</wd>

<space/>

</ln>

<ln l="1651" t="15197" r="5669" b="15379" baseLine="15336" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="15197" r="1954" b="15379">(pp.</wd>

<space/>

<wd l="2030" t="15197" r="2818" b="15379">111–122).</wd>

<space/>

<wd l="2880" t="15197" r="3360" b="15341">Kuala</wd>

<space/>

<wd l="3408" t="15202" r="4090" b="15379">Lumpur,</wd>

<space/>

<wd l="4152" t="15197" r="4925" b="15379">Malaysia:</wd>

<space/>

<wd l="4997" t="15197" r="5669" b="15341">SDIWC.</wd>

</ln>

</para>

</column>

<column l="6118" t="1416" r="10520" b="13659">

<para l="6130" t="1459" r="10493" b="2794" alignment="justified" li="216" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6130" t="1459" r="10474" b="1632" baseLine="1598" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="1459" r="6677" b="1632">Saloot,</wd>

<space/>

<wd l="6754" t="1464" r="6970" b="1603">M.</wd>

<space/>

<wd l="7056" t="1459" r="7291" b="1632">A.,</wd>

<space/>

<wd l="7378" t="1459" r="7781" b="1632">Idris,</wd>

<space/>

<wd l="7858" t="1464" r="8098" b="1632">N.,</wd>

<space/>

<wd l="8179" t="1459" r="8510" b="1632">Aw,</wd>

<space/>

<wd l="8592" t="1459" r="8827" b="1632">A.,</wd>

<space/>

<wd l="8914" t="1459" r="9058" b="1603">&amp;</wd>

<space/>

<wd l="9139" t="1459" r="10210" b="1632">Thorleuchter,</wd>

<space/>

<wd l="10291" t="1464" r="10474" b="1603">D.</wd>

<space/>

</ln>

<ln l="6355" t="1690" r="10488" b="1872" baseLine="1829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="1690" r="6922" b="1872">(2014).</wd>

<space/>

<wd l="6998" t="1690" r="7584" b="1834">Twitter</wd>

<space/>

<wd l="7646" t="1733" r="8170" b="1872">corpus</wd>

<space/>

<wd l="8242" t="1690" r="8923" b="1834">creation:</wd>

<space/>

<wd l="9005" t="1690" r="9307" b="1834">The</wd>

<space/>

<wd l="9379" t="1733" r="9715" b="1834">case</wd>

<space/>

<wd l="9782" t="1690" r="9970" b="1834">of</wd>

<space/>

<wd l="10013" t="1733" r="10099" b="1834">a</wd>

<space/>

<wd l="10157" t="1694" r="10488" b="1834">Ma-</wd>

</ln>

<ln l="6355" t="1920" r="10483" b="2102" baseLine="2054" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="1920" r="6595" b="2102">lay</wd>

<space/>

<wd l="6653" t="1920" r="7834" b="2102">Chat-style-text</wd>

<space/>

<wd l="7891" t="1920" r="8458" b="2102">Corpus</wd>

<space/>

<wd l="8520" t="1920" r="9134" b="2102">(MCC).</wd>

<space/>

<wd l="9197" t="1920" r="9749" b="2102">Digital</wd>

<space/>

<wd l="9816" t="1920" r="10483" b="2064">Scholar-</wd>

</ln>

<ln l="6360" t="2150" r="10493" b="2333" baseLine="2285" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="2150" r="6677" b="2333">ship</wd>

<space/>

<wd l="6941" t="2150" r="7090" b="2290">in</wd>

<space/>

<wd l="7344" t="2150" r="7584" b="2294">the</wd>

<space/>

<wd l="7843" t="2150" r="8803" b="2294">Humanities.</wd>

<space/>

<wd l="9067" t="2150" r="9845" b="2294">Retrieved</wd>

<space/>

<wd l="10104" t="2150" r="10493" b="2294">from</wd>

<space/>

</ln>

<ln l="6350" t="2381" r="10454" b="2563" baseLine="2515" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="2381" r="10454" b="2563">http://dsh.oxfordjournals.org/content/early/2014/12</wd>

<space/>

</ln>

<ln l="6350" t="2611" r="8150" b="2794" baseLine="2746" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="2611" r="8150" b="2794">/13/llc.fqu066.abstract</wd>

</ln>

</para>

<para l="6130" t="2962" r="10493" b="3797" alignment="justified" li="216" spaceBefore="113" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6130" t="2962" r="10488" b="3144" baseLine="3096" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2962" r="6677" b="3134">Saloot,</wd>

<space/>

<wd l="6763" t="2966" r="6979" b="3106">M.</wd>

<space/>

<wd l="7075" t="2962" r="7310" b="3134">A.,</wd>

<space/>

<wd l="7402" t="2962" r="7810" b="3134">Idris,</wd>

<space/>

<wd l="7891" t="2966" r="8136" b="3134">N.,</wd>

<space/>

<wd l="8227" t="2962" r="8371" b="3106">&amp;</wd>

<space/>

<wd l="8462" t="2962" r="9226" b="3134">Mahmud,</wd>

<space/>

<wd l="9312" t="2966" r="9485" b="3106">R.</wd>

<space/>

<wd l="9581" t="2962" r="10152" b="3144">(2014).</wd>

<space/>

<wd l="10248" t="2962" r="10488" b="3101">An</wd>

<space/>

</ln>

<ln l="6355" t="3192" r="10483" b="3374" baseLine="3326" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="3192" r="7291" b="3336">architecture</wd>

<space/>

<wd l="7411" t="3192" r="7642" b="3336">for</wd>

<space/>

<wd l="7752" t="3192" r="8261" b="3374">Malay</wd>

<space/>

<wd l="8376" t="3197" r="8875" b="3336">Tweet</wd>

<space/>

<wd l="8986" t="3192" r="10128" b="3336">normalization.</wd>

<space/>

<wd l="10258" t="3197" r="10483" b="3331">In-</wd>

</ln>

<ln l="6355" t="3422" r="10493" b="3605" baseLine="3557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="3422" r="7138" b="3566">formation</wd>

<space/>

<wd l="7205" t="3422" r="8069" b="3605">Processing</wd>

<space/>

<wd l="8150" t="3422" r="8294" b="3566">&amp;</wd>

<space/>

<wd l="8366" t="3427" r="9456" b="3605">Management,</wd>

<space/>

<wd l="9542" t="3422" r="10013" b="3605">50(5),</wd>

<space/>

<wd l="10094" t="3422" r="10493" b="3566">621–</wd>

<space/>

</ln>

<ln l="6355" t="3653" r="6691" b="3797" baseLine="3787" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="3653" r="6691" b="3797">633.</wd>

</ln>

</para>

<para l="6130" t="4003" r="10483" b="5102" alignment="justified" li="216" spaceBefore="112" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6130" t="4003" r="10474" b="4186" baseLine="4138" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="4003" r="6864" b="4186">Schlippe,</wd>

<space/>

<wd l="6979" t="4008" r="7195" b="4176">T.,</wd>

<space/>

<wd l="7306" t="4003" r="7666" b="4176">Zhu,</wd>

<space/>

<wd l="7786" t="4003" r="8006" b="4176">C.,</wd>

<space/>

<wd l="8122" t="4003" r="8904" b="4176">Gebhardt,</wd>

<space/>

<wd l="9014" t="4008" r="9187" b="4176">J.,</wd>

<space/>

<wd l="9307" t="4003" r="9451" b="4147">&amp;</wd>

<space/>

<wd l="9571" t="4003" r="10200" b="4176">Schultz,</wd>

<space/>

<wd l="10315" t="4008" r="10474" b="4147">T.</wd>

<space/>

</ln>

<ln l="6355" t="4234" r="10483" b="4416" baseLine="4368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="4234" r="6922" b="4416">(2010).</wd>

<space/>

<wd l="6998" t="4238" r="7363" b="4378">Text</wd>

<space/>

<wd l="7421" t="4234" r="8530" b="4378">normalization</wd>

<space/>

<wd l="8582" t="4234" r="9043" b="4378">based</wd>

<space/>

<wd l="9106" t="4277" r="9302" b="4378">on</wd>

<space/>

<wd l="9370" t="4234" r="10114" b="4378">statistical</wd>

<space/>

<wd l="10176" t="4277" r="10483" b="4378">ma-</wd>

</ln>

<ln l="6355" t="4464" r="10478" b="4646" baseLine="4598" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="4464" r="6778" b="4608">chine</wd>

<space/>

<wd l="6874" t="4464" r="7718" b="4608">translation</wd>

<space/>

<wd l="7810" t="4464" r="8098" b="4608">and</wd>

<space/>

<wd l="8194" t="4464" r="8798" b="4608">internet</wd>

<space/>

<wd l="8890" t="4507" r="9226" b="4608">user</wd>

<space/>

<wd l="9322" t="4483" r="9955" b="4646">support.</wd>

<space/>

<wd l="10061" t="4469" r="10224" b="4603">In</wd>

<space/>

<wd l="10320" t="4469" r="10478" b="4608">T.</wd>

<space/>

</ln>

<ln l="6350" t="4694" r="10483" b="4877" baseLine="4829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="4694" r="7243" b="4877">Kobayashi,</wd>

<space/>

<wd l="7325" t="4699" r="7507" b="4838">K.</wd>

<space/>

<wd l="7589" t="4694" r="8165" b="4867">Hirose,</wd>

<space/>

<wd l="8251" t="4694" r="8395" b="4838">&amp;</wd>

<space/>

<wd l="8482" t="4694" r="8621" b="4838">S.</wd>

<space/>

<wd l="8698" t="4694" r="9533" b="4838">Nakamura</wd>

<space/>

<wd l="9610" t="4694" r="10133" b="4877">(Eds.),</wd>

<space/>

<wd l="10214" t="4699" r="10483" b="4838">IN-</wd>

</ln>

<ln l="6355" t="4920" r="9446" b="5102" baseLine="5059" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="4920" r="7474" b="5064">TERSPEECH</wd>

<space/>

<wd l="7526" t="4920" r="7829" b="5102">(pp.</wd>

<space/>

<wd l="7910" t="4920" r="8894" b="5102">1816–1819).</wd>

<space/>

<wd l="8962" t="4920" r="9446" b="5064">ISCA.</wd>

</ln>

</para>

<para l="6130" t="5270" r="10488" b="6374" alignment="justified" li="216" spaceBefore="112" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6130" t="5270" r="10474" b="5453" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5270" r="6638" b="5443">Smith,</wd>

<space/>

<wd l="6715" t="5275" r="6888" b="5443">J.,</wd>

<space/>

<wd l="6970" t="5270" r="7114" b="5414">&amp;</wd>

<space/>

<wd l="7190" t="5270" r="7589" b="5443">Padi,</wd>

<space/>

<wd l="7666" t="5275" r="7819" b="5414">P.</wd>

<space/>

<wd l="7906" t="5270" r="8472" b="5453">(2006).</wd>

<space/>

<wd l="8549" t="5275" r="8885" b="5414">Lets</wd>

<space/>

<wd l="8962" t="5270" r="9394" b="5414">make</wd>

<space/>

<wd l="9470" t="5314" r="9557" b="5414">a</wd>

<space/>

<wd l="9629" t="5270" r="10474" b="5453">dictionary.</wd>

<space/>

</ln>

<ln l="6355" t="5501" r="10483" b="5683" baseLine="5640" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5506" r="6518" b="5640">In</wd>

<space/>

<wd l="6586" t="5501" r="7560" b="5683">Proceedings</wd>

<space/>

<wd l="7642" t="5501" r="7829" b="5645">of</wd>

<space/>

<wd l="7877" t="5501" r="8117" b="5645">the</wd>

<space/>

<wd l="8194" t="5501" r="8434" b="5645">the</wd>

<space/>

<wd l="8506" t="5501" r="9043" b="5683">Eighth</wd>

<space/>

<wd l="9115" t="5501" r="9787" b="5645">Biennial</wd>

<space/>

<wd l="9869" t="5501" r="10483" b="5645">Confer-</wd>

</ln>

<ln l="6355" t="5731" r="10478" b="5914" baseLine="5870" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5774" r="6710" b="5875">ence</wd>

<space/>

<wd l="6797" t="5731" r="6984" b="5875">of</wd>

<space/>

<wd l="7037" t="5731" r="7282" b="5875">the</wd>

<space/>

<wd l="7363" t="5736" r="7949" b="5875">Borneo</wd>

<space/>

<wd l="8035" t="5731" r="8770" b="5875">Research</wd>

<space/>

<wd l="8851" t="5731" r="9475" b="5875">Council</wd>

<space/>

<wd l="9562" t="5731" r="10090" b="5914">(BRC)</wd>

<space/>

<wd l="10176" t="5731" r="10478" b="5914">(pp.</wd>

<space/>

</ln>

<ln l="6355" t="5962" r="10488" b="6144" baseLine="6101" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5962" r="7157" b="6144">515–520).</wd>

<space/>

<wd l="7296" t="5962" r="8011" b="6134">Sarawak,</wd>

<space/>

<wd l="8141" t="5962" r="8918" b="6144">Malaysia:</wd>

<space/>

<wd l="9048" t="5966" r="9634" b="6106">Borneo</wd>

<space/>

<wd l="9754" t="5962" r="10488" b="6106">Research</wd>

<space/>

</ln>

<ln l="6355" t="6192" r="7603" b="6374" baseLine="6331" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="6192" r="6979" b="6336">Council</wd>

<space/>

<wd l="7037" t="6192" r="7603" b="6374">(BRC).</wd>

</ln>

</para>

<para l="6130" t="6542" r="10512" b="7416" alignment="justified" li="216" spaceBefore="109" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6130" t="6542" r="10483" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6542" r="6686" b="6725">Sproat,</wd>

<space/>

<wd l="6754" t="6547" r="6979" b="6715">R.,</wd>

<space/>

<wd l="7046" t="6542" r="7555" b="6715">Black,</wd>

<space/>

<wd l="7622" t="6542" r="7800" b="6686">A.</wd>

<space/>

<wd l="7872" t="6547" r="8155" b="6715">W.,</wd>

<space/>

<wd l="8227" t="6542" r="8683" b="6715">Chen,</wd>

<space/>

<wd l="8760" t="6542" r="8952" b="6715">S.,</wd>

<space/>

<wd l="9019" t="6547" r="9614" b="6715">Kumar,</wd>

<space/>

<wd l="9691" t="6542" r="9888" b="6715">S.,</wd>

<space/>

<wd l="9960" t="6542" r="10483" b="6686">Osten-</wd>

</ln>

<ln l="6355" t="6773" r="10512" b="6955" baseLine="6907" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="6773" r="6725" b="6946">dorf,</wd>

<space/>

<wd l="6802" t="6778" r="7070" b="6946">M.,</wd>

<space/>

<wd l="7152" t="6773" r="7296" b="6917">&amp;</wd>

<space/>

<wd l="7368" t="6773" r="8122" b="6946">Richards,</wd>

<space/>

<wd l="8198" t="6773" r="8366" b="6917">C.</wd>

<space/>

<wd l="8448" t="6773" r="9019" b="6955">(2001).</wd>

<space/>

<wd l="9096" t="6773" r="10253" b="6917">Normalization</wd>

<space/>

<wd l="10320" t="6773" r="10512" b="6917">of</wd>

<space/>

</ln>

<ln l="6350" t="7003" r="10483" b="7186" baseLine="7138" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="7003" r="7392" b="7147">non-standard</wd>

<space/>

<wd l="7526" t="7003" r="8050" b="7147">words.</wd>

<space/>

<wd l="8194" t="7003" r="8995" b="7186">Computer</wd>

<space/>

<wd l="9130" t="7003" r="9696" b="7186">Speech</wd>

<space/>

<wd l="9830" t="7003" r="9974" b="7147">&amp;</wd>

<space/>

<wd l="10109" t="7008" r="10483" b="7147">Lan-</wd>

</ln>

<ln l="6355" t="7234" r="8198" b="7416" baseLine="7368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="7277" r="6869" b="7416">guage,</wd>

<space/>

<wd l="6946" t="7234" r="7406" b="7416">15(3),</wd>

<space/>

<wd l="7464" t="7234" r="8198" b="7378">287–333.</wd>

</ln>

</para>

<para l="6130" t="7584" r="10483" b="8453" alignment="justified" li="216" spaceBefore="114" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6130" t="7584" r="10483" b="7766" baseLine="7718" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7584" r="6763" b="7757">Stolcke,</wd>

<space/>

<wd l="6878" t="7584" r="7061" b="7728">A.</wd>

<space/>

<wd l="7186" t="7584" r="7757" b="7766">(2002).</wd>

<space/>

<wd l="7882" t="7584" r="8741" b="7728">SRILM-an</wd>

<space/>

<wd l="8851" t="7584" r="9653" b="7728">extensible</wd>

<space/>

<wd l="9768" t="7584" r="10483" b="7766">language</wd>

<space/>

</ln>

<ln l="6350" t="7810" r="10483" b="7992" baseLine="7949" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="7810" r="7099" b="7992">modeling</wd>

<space/>

<wd l="7262" t="7810" r="7824" b="7954">toolkit.</wd>

<space/>

<wd l="8002" t="7814" r="8165" b="7949">In</wd>

<space/>

<wd l="8323" t="7810" r="9298" b="7992">Proceedings</wd>

<space/>

<wd l="9470" t="7810" r="10483" b="7954">International</wd>

<space/>

</ln>

<ln l="6355" t="8040" r="10478" b="8222" baseLine="8179" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="8040" r="7267" b="8184">Conference</wd>

<space/>

<wd l="7363" t="8083" r="7560" b="8184">on</wd>

<space/>

<wd l="7656" t="8040" r="8251" b="8222">Spoken</wd>

<space/>

<wd l="8338" t="8045" r="9120" b="8222">Language</wd>

<space/>

<wd l="9211" t="8040" r="10075" b="8222">Processing</wd>

<space/>

<wd l="10176" t="8040" r="10478" b="8222">(pp.</wd>

<space/>

</ln>

<ln l="6350" t="8270" r="7157" b="8453" baseLine="8410" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="8270" r="7157" b="8453">257–286).</wd>

</ln>

</para>

<para l="6130" t="8621" r="10493" b="9456" alignment="justified" li="216" spaceBefore="108" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6130" t="8621" r="10493" b="8803" baseLine="8760" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="8621" r="6595" b="8794">Streit,</wd>

<space/>

<wd l="6686" t="8626" r="6859" b="8765">R.</wd>

<space/>

<wd l="6955" t="8626" r="7171" b="8794">L.,</wd>

<space/>

<wd l="7267" t="8621" r="7411" b="8765">&amp;</wd>

<space/>

<wd l="7502" t="8621" r="8376" b="8803">Luginbuhl,</wd>

<space/>

<wd l="8477" t="8626" r="8635" b="8765">T.</wd>

<space/>

<wd l="8731" t="8626" r="8890" b="8765">E.</wd>

<space/>

<wd l="8990" t="8621" r="9557" b="8803">(1994).</wd>

<space/>

<wd l="9653" t="8621" r="10493" b="8765">Maximum</wd>

<space/>

</ln>

<ln l="6355" t="8851" r="10474" b="9034" baseLine="8990" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="8851" r="7162" b="8995">likelihood</wd>

<space/>

<wd l="7210" t="8851" r="7829" b="9034">training</wd>

<space/>

<wd l="7886" t="8851" r="8074" b="8995">of</wd>

<space/>

<wd l="8093" t="8851" r="9096" b="9034">probabilistic</wd>

<space/>

<wd l="9154" t="8851" r="9648" b="8995">neural</wd>

<space/>

<wd l="9701" t="8851" r="10474" b="8995">networks.</wd>

<space/>

</ln>

<ln l="6346" t="9082" r="10483" b="9264" baseLine="9221" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="9082" r="6888" b="9226">Neural</wd>

<space/>

<wd l="7022" t="9082" r="7848" b="9254">Networks,</wd>

<space/>

<wd l="7997" t="9086" r="8424" b="9221">IEEE</wd>

<space/>

<wd l="8568" t="9082" r="9576" b="9226">Transactions</wd>

<space/>

<wd l="9720" t="9125" r="9960" b="9254">on,</wd>

<space/>

<wd l="10114" t="9082" r="10483" b="9264">5(5),</wd>

<space/>

</ln>

<ln l="6355" t="9312" r="9010" b="9456" baseLine="9446" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="9312" r="7090" b="9456">764–783.</wd>

<space/>

<wd l="7157" t="9312" r="9010" b="9456">doi:10.1109/72.317728</wd>

</ln>

</para>

<para l="6125" t="9662" r="10483" b="10306" alignment="justified" li="216" spaceBefore="116" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="9662" r="10478" b="9845" baseLine="9797" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9662" r="6850" b="9835">Thurlow,</wd>

<space/>

<wd l="6950" t="9662" r="7171" b="9835">C.,</wd>

<space/>

<wd l="7272" t="9662" r="7416" b="9806">&amp;</wd>

<space/>

<wd l="7507" t="9667" r="8098" b="9835">Brown,</wd>

<space/>

<wd l="8194" t="9662" r="8371" b="9806">A.</wd>

<space/>

<wd l="8477" t="9662" r="9048" b="9845">(2003).</wd>

<space/>

<wd l="9149" t="9662" r="10032" b="9806">Generation</wd>

<space/>

<wd l="10123" t="9662" r="10478" b="9806">Txt?</wd>

<space/>

</ln>

<ln l="6355" t="9893" r="10483" b="10075" baseLine="10027" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="9893" r="6658" b="10037">The</wd>

<space/>

<wd l="6845" t="9893" r="8074" b="10075">sociolinguistics</wd>

<space/>

<wd l="8256" t="9893" r="8443" b="10037">of</wd>

<space/>

<wd l="8597" t="9936" r="9091" b="10075">young</wd>

<space/>

<wd l="9264" t="9893" r="9941" b="10075">people’s</wd>

<space/>

<wd l="10118" t="9912" r="10483" b="10037">text-</wd>

</ln>

<ln l="6350" t="10123" r="7229" b="10306" baseLine="10258" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="10123" r="7229" b="10306">messaging.</wd>

</ln>

</para>

<para l="6125" t="10474" r="10493" b="11803" alignment="justified" li="216" spaceBefore="110" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="10474" r="10488" b="10656" baseLine="10608" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10478" r="7018" b="10646">Toutanova,</wd>

<space/>

<wd l="7085" t="10478" r="7320" b="10646">K.,</wd>

<space/>

<wd l="7392" t="10474" r="7536" b="10618">&amp;</wd>

<space/>

<wd l="7598" t="10478" r="8174" b="10646">Moore,</wd>

<space/>

<wd l="8242" t="10478" r="8410" b="10618">R.</wd>

<space/>

<wd l="8486" t="10474" r="8654" b="10618">C.</wd>

<space/>

<wd l="8726" t="10474" r="9298" b="10656">(2002).</wd>

<space/>

<wd l="9365" t="10474" r="10488" b="10618">Pronunciation</wd>

<space/>

</ln>

<ln l="6350" t="10704" r="10488" b="10886" baseLine="10838" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="10704" r="7123" b="10886">Modeling</wd>

<space/>

<wd l="7253" t="10704" r="7488" b="10848">for</wd>

<space/>

<wd l="7608" t="10704" r="8381" b="10886">Improved</wd>

<space/>

<wd l="8515" t="10704" r="9168" b="10886">Spelling</wd>

<space/>

<wd l="9298" t="10704" r="10186" b="10848">Correction.</wd>

<space/>

<wd l="10325" t="10709" r="10488" b="10843">In</wd>

<space/>

</ln>

<ln l="6350" t="10930" r="10483" b="11112" baseLine="11069" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="10930" r="7320" b="11112">Proceedings</wd>

<space/>

<wd l="7397" t="10930" r="7584" b="11074">of</wd>

<space/>

<wd l="7627" t="10930" r="7867" b="11074">the</wd>

<space/>

<wd l="7939" t="10930" r="8294" b="11074">40th</wd>

<space/>

<wd l="8362" t="10930" r="8947" b="11074">Annual</wd>

<space/>

<wd l="9019" t="10930" r="9682" b="11112">Meeting</wd>

<space/>

<wd l="9758" t="10973" r="9955" b="11074">on</wd>

<space/>

<wd l="10022" t="10930" r="10483" b="11074">Asso-</wd>

</ln>

<ln l="6355" t="11160" r="10493" b="11342" baseLine="11299" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="11160" r="6893" b="11304">ciation</wd>

<space/>

<wd l="7013" t="11160" r="7243" b="11304">for</wd>

<space/>

<wd l="7358" t="11160" r="8539" b="11342">Computational</wd>

<space/>

<wd l="8654" t="11160" r="9538" b="11342">Linguistics</wd>

<space/>

<wd l="9662" t="11160" r="9965" b="11342">(pp.</wd>

<space/>

<wd l="10109" t="11160" r="10493" b="11299">144–</wd>

<space/>

</ln>

<ln l="6370" t="11390" r="10493" b="11573" baseLine="11530" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="11390" r="6758" b="11573">151).</wd>

<space/>

<wd l="6926" t="11390" r="7934" b="11573">Stroudsburg,</wd>

<space/>

<wd l="8093" t="11390" r="8390" b="11563">PA,</wd>

<space/>

<wd l="8549" t="11390" r="8995" b="11534">USA:</wd>

<space/>

<wd l="9158" t="11390" r="10104" b="11534">Association</wd>

<space/>

<wd l="10258" t="11390" r="10493" b="11534">for</wd>

<space/>

</ln>

<ln l="6355" t="11621" r="8515" b="11803" baseLine="11760" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="11621" r="7531" b="11803">Computational</wd>

<space/>

<wd l="7589" t="11621" r="8515" b="11803">Linguistics.</wd>

</ln>

</para>

<para l="6120" t="11971" r="10483" b="12614" alignment="justified" li="216" spaceBefore="110" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6120" t="11971" r="10483" b="12154" baseLine="12110" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11976" r="6494" b="12144">Xue,</wd>

<space/>

<wd l="6566" t="11976" r="6782" b="12144">Z.,</wd>

<space/>

<wd l="6850" t="11971" r="7195" b="12144">Yin,</wd>

<space/>

<wd l="7262" t="11976" r="7502" b="12144">D.,</wd>

<space/>

<wd l="7579" t="11971" r="7723" b="12115">&amp;</wd>

<space/>

<wd l="7790" t="11971" r="8501" b="12144">Davison,</wd>

<space/>

<wd l="8573" t="11976" r="8746" b="12115">B.</wd>

<space/>

<wd l="8818" t="11976" r="9000" b="12115">D.</wd>

<space/>

<wd l="9082" t="11971" r="9739" b="12154">(2011a).</wd>

<space/>

<wd l="9806" t="11971" r="10483" b="12115">Normal-</wd>

</ln>

<ln l="6355" t="12202" r="10483" b="12384" baseLine="12336" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="12202" r="6744" b="12384">izing</wd>

<space/>

<wd l="6802" t="12202" r="7627" b="12346">Microtext.</wd>

<space/>

<wd l="7699" t="12206" r="7862" b="12341">In</wd>

<space/>

<wd l="7920" t="12202" r="8746" b="12384">Analyzing</wd>

<space/>

<wd l="8808" t="12202" r="9595" b="12346">Microtext</wd>

<space/>

<wd l="9653" t="12202" r="10056" b="12384">(Vol.</wd>

<space/>

<wd l="10123" t="12202" r="10483" b="12346">WS-</wd>

</ln>

<ln l="6370" t="12432" r="7550" b="12614" baseLine="12566" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="12432" r="6955" b="12614">11–05).</wd>

<space/>

<wd l="7018" t="12432" r="7550" b="12576">AAAI.</wd>

</ln>

</para>

<para l="6120" t="12782" r="10483" b="13646" alignment="justified" li="216" spaceBefore="114" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="12782" r="10478" b="12965" baseLine="12917" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="12787" r="6494" b="12955">Xue,</wd>

<space/>

<wd l="6566" t="12787" r="6778" b="12955">Z.,</wd>

<space/>

<wd l="6850" t="12782" r="7190" b="12955">Yin,</wd>

<space/>

<wd l="7262" t="12787" r="7498" b="12955">D.,</wd>

<space/>

<wd l="7574" t="12782" r="7718" b="12926">&amp;</wd>

<space/>

<wd l="7786" t="12782" r="8491" b="12955">Davison,</wd>

<space/>

<wd l="8563" t="12787" r="8736" b="12926">B.</wd>

<space/>

<wd l="8808" t="12787" r="8990" b="12926">D.</wd>

<space/>

<wd l="9072" t="12782" r="9734" b="12965">(2011b).</wd>

<space/>

<wd l="9806" t="12782" r="10478" b="12926">Normal-</wd>

</ln>

<ln l="6355" t="13013" r="10483" b="13195" baseLine="13147" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="13013" r="6744" b="13195">izing</wd>

<space/>

<wd l="6854" t="13013" r="7680" b="13157">Microtext.</wd>

<space/>

<wd l="7800" t="13018" r="7963" b="13152">In</wd>

<space/>

<wd l="8069" t="13013" r="8899" b="13195">Analyzing</wd>

<space/>

<wd l="9005" t="13013" r="9835" b="13157">Microtext:</wd>

<space/>

<wd l="9955" t="13018" r="10483" b="13195">Papers</wd>

<space/>

</ln>

<ln l="6355" t="13243" r="10483" b="13426" baseLine="13378" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="13243" r="6744" b="13387">from</wd>

<space/>

<wd l="6811" t="13243" r="7056" b="13387">the</wd>

<space/>

<wd l="7133" t="13243" r="7512" b="13387">2011</wd>

<space/>

<wd l="7613" t="13243" r="8107" b="13382">AAAI</wd>

<space/>

<wd l="8189" t="13243" r="9019" b="13426">Workshop</wd>

<space/>

<wd l="9106" t="13243" r="9408" b="13426">(pp.</wd>

<space/>

<wd l="9499" t="13243" r="10099" b="13426">74–79).</wd>

<space/>

<wd l="10200" t="13243" r="10483" b="13387">San</wd>

<space/>

</ln>

<ln l="6350" t="13474" r="8645" b="13646" baseLine="13608" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="13474" r="7166" b="13646">Francisco,</wd>

<space/>

<wd l="7229" t="13474" r="7546" b="13646">CA,</wd>

<space/>

<wd l="7603" t="13474" r="8045" b="13618">USA:</wd>

<space/>

<wd l="8112" t="13474" r="8645" b="13618">AAAI.</wd>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15736" r="6176" b="15977">

<para l="5804" t="15792" r="6143" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="24">

<wd l="5870" t="15792" r="6077" b="15946">27</wd>

</ln>

</para>

</dd>

</body>

</page>

</document>

