<?xml version="1.0" encoding="UTF-16"?>

<!--XML document generated using OCR technology from Nuance Communications, Inc.-->

<document xmlns="http://www.scansoft.com/omnipage/xml/ssdoc-schema3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4316.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1435" marginTop="1360" marginRight="1378" marginBottom="358" offsetX="-26" offsetY="-18" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1435" t="1360" r="10531" b="2066">

<column l="1435" t="1360" r="10531" b="2066">

<para l="2434" t="1435" r="9523" b="1699" alignment="centered" spaceBefore="19" spaceAfter="348" lsp="exactly" lspExact="332" language="en">

<ln l="2434" t="1435" r="9523" b="1699" baseLine="1632" bold="true" underlined="none" subsuperscript="none" fontSize="1450" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="2434" t="1440" r="3086" b="1642">IITP:</wd>

<space/>

<wd l="3187" t="1435" r="4070" b="1699">Hybrid</wd>

<space/>

<wd l="4147" t="1435" r="5357" b="1699">Approach</wd>

<space/>

<wd l="5434" t="1435" r="5794" b="1642">for</wd>

<space/>

<wd l="5870" t="1440" r="6398" b="1642">Text</wd>

<space/>

<wd l="6470" t="1435" r="8237" b="1646">Normalization</wd>

<space/>

<wd l="8309" t="1435" r="8549" b="1637">in</wd>

<space/>

<wd l="8626" t="1435" r="9523" b="1642">Twitter</wd>

</ln>

</para>

</column>

</section>

<section l="1435" t="2066" r="10531" b="4092">

<column l="1435" t="2066" r="10531" b="4092">

<para l="3101" t="2112" r="8904" b="3446" alignment="centered" spaceAfter="643" lsp="exactly" lspExact="276" language="en">

<ln l="3202" t="2112" r="8746" b="2333" baseLine="2280" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="3202" t="2117" r="3557" b="2285">Md</wd>

<space/>

<wd l="3624" t="2112" r="4133" b="2285">Shad</wd>

<space/>

<wd l="4200" t="2112" r="4978" b="2323">Akhtar,</wd>

<space/>

<wd l="5040" t="2117" r="5606" b="2333">Utpal</wd>

<space/>

<wd l="5674" t="2117" r="6408" b="2285">Kumar</wd>

<space/>

<wd l="6475" t="2112" r="7162" b="2285">Sikdar</wd>

<space/>

<wd l="7224" t="2117" r="7603" b="2285">and</wd>

<space/>

<wd l="7666" t="2112" r="8093" b="2285">Asif</wd>

<space/>

<wd l="8136" t="2117" r="8746" b="2285">Ekbal
</wd>

</ln>

<ln l="3888" t="2395" r="8059" b="2616" baseLine="2558" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="3888" t="2405" r="4354" b="2616">Dept</wd>

<space/>

<wd l="4416" t="2400" r="4627" b="2568">of</wd>

<space/>

<wd l="4675" t="2400" r="5630" b="2616">Computer</wd>

<space/>

<wd l="5698" t="2400" r="6432" b="2568">Science</wd>

<space/>

<wd l="6494" t="2400" r="6835" b="2568">and</wd>

<space/>

<wd l="6893" t="2400" r="8059" b="2616">Engineering
</wd>

</ln>

<ln l="5530" t="2674" r="6427" b="2846" baseLine="2837" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5530" t="2683" r="5832" b="2842">IIT</wd>

<space/>

<wd l="5894" t="2683" r="6427" b="2846">Patna
</wd>

</ln>

<ln l="5405" t="2957" r="6547" b="3154" baseLine="3115" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5405" t="2962" r="5981" b="3154">Patna,</wd>

<space/>

<wd l="6053" t="2957" r="6547" b="3125">India
</wd>

</ln>

<ln l="3101" t="3245" r="8904" b="3446" baseLine="3394" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0" forcedEOF="true">

<wd l="3101" t="3245" r="8904" b="3446">(shad.pcs15,utpal.sikdar,asif)@iitp.ac.in</wd>

</ln>

</para>

</column>

</section>

<section l="1435" t="4092" r="10531" b="15317">

<column l="1435" t="4092" r="5822" b="15317">

<para l="3178" t="4142" r="4070" b="4315" alignment="centered" spaceBefore="3" lsp="exactly" lspExact="269" language="en">

<ln l="3178" t="4142" r="4070" b="4315" baseLine="4306" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3178" t="4142" r="4070" b="4315">Abstract</wd>

</ln>

</para>

<para l="1781" t="4661" r="5477" b="8880" alignment="justified" li="216" ri="360" spaceBefore="233" lsp="exactly" lspExact="271" language="en">

<ln l="1781" t="4661" r="5453" b="4862" baseLine="4810" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="4666" r="1963" b="4814">In</wd>

<space/>

<wd l="2035" t="4661" r="2342" b="4819">this</wd>

<space/>

<wd l="2424" t="4714" r="2909" b="4862">paper</wd>

<space/>

<wd l="2981" t="4714" r="3230" b="4819">we</wd>

<space/>

<wd l="3307" t="4685" r="3826" b="4862">report</wd>

<space/>

<wd l="3902" t="4714" r="4195" b="4819">our</wd>

<space/>

<wd l="4262" t="4661" r="4709" b="4819">work</wd>

<space/>

<wd l="4781" t="4661" r="5040" b="4819">for</wd>

<space/>

<wd l="5107" t="4714" r="5453" b="4819">nor-</wd>

</ln>

<ln l="1781" t="4930" r="5453" b="5131" baseLine="5078" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="4930" r="2702" b="5088">malization</wd>

<space/>

<wd l="2803" t="4930" r="2990" b="5088">of</wd>

<space/>

<wd l="3072" t="4930" r="3542" b="5131">noisy</wd>

<space/>

<wd l="3643" t="4954" r="3970" b="5088">text</wd>

<space/>

<wd l="4061" t="4930" r="4234" b="5083">in</wd>

<space/>

<wd l="4325" t="4930" r="4954" b="5088">Twitter</wd>

<space/>

<wd l="5054" t="4930" r="5453" b="5088">data.</wd>

<space/>

</ln>

<ln l="1781" t="5203" r="5453" b="5405" baseLine="5352" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="5203" r="2117" b="5362">The</wd>

<space/>

<wd l="2203" t="5203" r="2861" b="5362">method</wd>

<space/>

<wd l="2942" t="5256" r="3192" b="5362">we</wd>

<space/>

<wd l="3278" t="5256" r="3970" b="5405">propose</wd>

<space/>

<wd l="4051" t="5203" r="4190" b="5362">is</wd>

<space/>

<wd l="4282" t="5203" r="4853" b="5405">hybrid</wd>

<space/>

<wd l="4934" t="5203" r="5102" b="5357">in</wd>

<space/>

<wd l="5189" t="5256" r="5453" b="5362">na-</wd>

</ln>

<ln l="1781" t="5472" r="5467" b="5674" baseLine="5621" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="5496" r="2117" b="5630">ture</wd>

<space/>

<wd l="2189" t="5472" r="2515" b="5630">that</wd>

<space/>

<wd l="2587" t="5472" r="3413" b="5630">combines</wd>

<space/>

<wd l="3485" t="5472" r="4224" b="5630">machine</wd>

<space/>

<wd l="4296" t="5472" r="5006" b="5674">learning</wd>

<space/>

<wd l="5078" t="5472" r="5467" b="5630">with</wd>

<space/>

</ln>

<ln l="1781" t="5741" r="5453" b="5942" baseLine="5894" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="5741" r="2246" b="5899">rules.</wd>

<space/>

<wd l="2496" t="5746" r="2678" b="5894">In</wd>

<space/>

<wd l="2784" t="5741" r="3053" b="5899">the</wd>

<space/>

<wd l="3168" t="5741" r="3504" b="5899">first</wd>

<space/>

<wd l="3624" t="5765" r="4008" b="5942">step,</wd>

<space/>

<wd l="4152" t="5741" r="5078" b="5942">supervised</wd>

<space/>

<wd l="5194" t="5794" r="5453" b="5942">ap-</wd>

</ln>

<ln l="1781" t="6014" r="5467" b="6216" baseLine="6163" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="6014" r="2376" b="6216">proach</wd>

<space/>

<wd l="2443" t="6014" r="2942" b="6173">based</wd>

<space/>

<wd l="3010" t="6067" r="3226" b="6173">on</wd>

<space/>

<wd l="3298" t="6014" r="4272" b="6173">conditional</wd>

<space/>

<wd l="4344" t="6014" r="5011" b="6173">random</wd>

<space/>

<wd l="5083" t="6014" r="5467" b="6173">field</wd>

<space/>

</ln>

<ln l="1781" t="6283" r="5467" b="6485" baseLine="6437" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="6283" r="1920" b="6442">is</wd>

<space/>

<wd l="1997" t="6283" r="2923" b="6485">developed,</wd>

<space/>

<wd l="3014" t="6283" r="3326" b="6442">and</wd>

<space/>

<wd l="3394" t="6283" r="3562" b="6437">in</wd>

<space/>

<wd l="3629" t="6283" r="3893" b="6442">the</wd>

<space/>

<wd l="3974" t="6283" r="4570" b="6442">second</wd>

<space/>

<wd l="4646" t="6307" r="4987" b="6485">step</wd>

<space/>

<wd l="5064" t="6336" r="5160" b="6442">a</wd>

<space/>

<wd l="5232" t="6307" r="5467" b="6442">set</wd>

<space/>

</ln>

<ln l="1786" t="6557" r="5453" b="6758" baseLine="6706" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1786" t="6557" r="1978" b="6715">of</wd>

<space/>

<wd l="2054" t="6557" r="2880" b="6715">heuristics</wd>

<space/>

<wd l="2981" t="6557" r="3398" b="6715">rules</wd>

<space/>

<wd l="3499" t="6557" r="3634" b="6715">is</wd>

<space/>

<wd l="3739" t="6557" r="4378" b="6758">applied</wd>

<space/>

<wd l="4469" t="6581" r="4637" b="6715">to</wd>

<space/>

<wd l="4733" t="6557" r="4997" b="6715">the</wd>

<space/>

<wd l="5093" t="6610" r="5453" b="6715">can-</wd>

</ln>

<ln l="1786" t="6826" r="5453" b="6984" baseLine="6979" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1786" t="6826" r="2314" b="6984">didate</wd>

<space/>

<wd l="2414" t="6826" r="3365" b="6984">wordforms</wd>

<space/>

<wd l="3475" t="6826" r="3730" b="6984">for</wd>

<space/>

<wd l="3830" t="6826" r="4094" b="6984">the</wd>

<space/>

<wd l="4200" t="6826" r="5453" b="6984">normalization.</wd>

<space/>

</ln>

<ln l="1781" t="7099" r="5453" b="7258" baseLine="7248" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="7099" r="2117" b="7258">The</wd>

<space/>

<wd l="2194" t="7099" r="2971" b="7258">classifier</wd>

<space/>

<wd l="3038" t="7099" r="3178" b="7258">is</wd>

<space/>

<wd l="3254" t="7099" r="3864" b="7258">trained</wd>

<space/>

<wd l="3931" t="7099" r="4320" b="7258">with</wd>

<space/>

<wd l="4397" t="7152" r="4493" b="7258">a</wd>

<space/>

<wd l="4570" t="7123" r="4805" b="7258">set</wd>

<space/>

<wd l="4877" t="7099" r="5069" b="7258">of</wd>

<space/>

<wd l="5126" t="7099" r="5453" b="7258">fea-</wd>

</ln>

<ln l="1781" t="7368" r="5462" b="7526" baseLine="7522" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="7392" r="2198" b="7526">tures</wd>

<space/>

<wd l="2280" t="7368" r="2818" b="7526">which</wd>

<space/>

<wd l="2890" t="7421" r="3312" b="7526">were</wd>

<space/>

<wd l="3394" t="7421" r="3658" b="7526">are</wd>

<space/>

<wd l="3739" t="7368" r="4382" b="7526">derived</wd>

<space/>

<wd l="4454" t="7368" r="5126" b="7526">without</wd>

<space/>

<wd l="5198" t="7368" r="5462" b="7526">the</wd>

<space/>

</ln>

<ln l="1781" t="7642" r="5467" b="7843" baseLine="7790" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="7694" r="2069" b="7800">use</wd>

<space/>

<wd l="2146" t="7642" r="2338" b="7800">of</wd>

<space/>

<wd l="2395" t="7694" r="2698" b="7843">any</wd>

<space/>

<wd l="2774" t="7642" r="4157" b="7843">domain-specific</wd>

<space/>

<wd l="4234" t="7642" r="4838" b="7800">feature</wd>

<space/>

<wd l="4910" t="7642" r="5467" b="7800">and/or</wd>

<space/>

</ln>

<ln l="1781" t="7910" r="5462" b="8112" baseLine="8064" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="7963" r="2563" b="8069">resource.</wd>

<space/>

<wd l="2760" t="7910" r="3096" b="8069">The</wd>

<space/>

<wd l="3197" t="7910" r="3787" b="8069">overall</wd>

<space/>

<wd l="3893" t="7934" r="4493" b="8112">system</wd>

<space/>

<wd l="4584" t="7910" r="5098" b="8112">yields</wd>

<space/>

<wd l="5198" t="7910" r="5462" b="8069">the</wd>

<space/>

</ln>

<ln l="1781" t="8184" r="5477" b="8386" baseLine="8333" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="8184" r="2626" b="8386">precision,</wd>

<space/>

<wd l="2712" t="8184" r="3197" b="8342">recall</wd>

<space/>

<wd l="3278" t="8184" r="3590" b="8342">and</wd>

<space/>

<wd l="3662" t="8189" r="4584" b="8342">F-measure</wd>

<space/>

<wd l="4656" t="8184" r="5203" b="8342">values</wd>

<space/>

<wd l="5290" t="8184" r="5477" b="8342">of</wd>

<space/>

</ln>

<ln l="1786" t="8453" r="5462" b="8654" baseLine="8602" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1786" t="8453" r="2496" b="8640">90.26%,</wd>

<space/>

<wd l="2573" t="8458" r="3235" b="8611">71.91%</wd>

<space/>

<wd l="3312" t="8453" r="3624" b="8611">and</wd>

<space/>

<wd l="3696" t="8453" r="4349" b="8611">80.05%</wd>

<space/>

<wd l="4421" t="8453" r="5462" b="8654">respectively</wd>

<space/>

</ln>

<ln l="1781" t="8722" r="3418" b="8880" baseLine="8875" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1781" t="8722" r="2040" b="8880">for</wd>

<space/>

<wd l="2088" t="8722" r="2357" b="8880">the</wd>

<space/>

<wd l="2410" t="8746" r="2717" b="8880">test</wd>

<space/>

<wd l="2774" t="8722" r="3418" b="8880">dataset.</wd>

</ln>

</para>

<para l="1454" t="9221" r="3091" b="9394" alignment="left" spaceBefore="240" lsp="exactly" lspExact="269" language="en">

<ln l="1454" t="9221" r="3091" b="9394" baseLine="9384" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="1454" t="9221" r="1550" b="9389">1</wd>

<space/>

<wd l="1805" t="9221" r="3091" b="9394">Introduction</wd>

</ln>

</para>

<para l="1440" t="9672" r="5813" b="14477" alignment="justified" spaceBefore="167" spaceAfter="129" lsp="exactly" lspExact="271" language="en">

<ln l="1440" t="9672" r="5794" b="9874" baseLine="9821" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="9672" r="2069" b="9830">Twitter</wd>

<space/>

<wd l="2112" t="9672" r="2395" b="9830">has</wd>

<space/>

<wd l="2462" t="9725" r="2842" b="9830">seen</wd>

<space/>

<wd l="2894" t="9725" r="2990" b="9830">a</wd>

<space/>

<wd l="3034" t="9672" r="4099" b="9874">phenomenal</wd>

<space/>

<wd l="4157" t="9672" r="4762" b="9874">growth</wd>

<space/>

<wd l="4810" t="9672" r="4982" b="9826">in</wd>

<space/>

<wd l="5030" t="9672" r="5294" b="9830">the</wd>

<space/>

<wd l="5347" t="9725" r="5794" b="9830">num-</wd>

</ln>

<ln l="1440" t="9941" r="5803" b="10142" baseLine="10090" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="9941" r="1723" b="10099">ber</wd>

<space/>

<wd l="1800" t="9941" r="1987" b="10099">of</wd>

<space/>

<wd l="2050" t="9994" r="2491" b="10099">users</wd>

<space/>

<wd l="2578" t="9941" r="3139" b="10142">during</wd>

<space/>

<wd l="3216" t="9941" r="3480" b="10099">the</wd>

<space/>

<wd l="3557" t="9941" r="3859" b="10099">last</wd>

<space/>

<wd l="3936" t="9941" r="4253" b="10099">few</wd>

<space/>

<wd l="4330" t="9994" r="4834" b="10142">years.</wd>

<space/>

<wd l="4978" t="9946" r="5410" b="10099">Over</wd>

<space/>

<wd l="5482" t="9941" r="5803" b="10099">500</wd>

<space/>

</ln>

<ln l="1440" t="10214" r="5808" b="10416" baseLine="10363" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="10214" r="2074" b="10373">million</wd>

<space/>

<wd l="2131" t="10267" r="2501" b="10373">user</wd>

<space/>

<wd l="2558" t="10238" r="3312" b="10373">accounts</wd>

<space/>

<wd l="3379" t="10214" r="3782" b="10373">have</wd>

<space/>

<wd l="3845" t="10214" r="4258" b="10373">been</wd>

<space/>

<wd l="4320" t="10214" r="5174" b="10416">registered</wd>

<space/>

<wd l="5237" t="10214" r="5626" b="10373">with</wd>

<space/>

<wd l="5683" t="10214" r="5808" b="10373">it</wd>

<space/>

</ln>

<ln l="1440" t="10445" r="5808" b="10685" baseLine="10630">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="1440" t="10483" r="1829" b="10642">with</wd>

<space/>

<wd l="1910" t="10536" r="2506" b="10685">approx</wd>

<space/>

<wd l="2592" t="10488" r="2909" b="10642">302</wd>

<space/>

<wd l="2990" t="10483" r="3624" b="10642">million</wd>

<space/>

<wd l="3701" t="10483" r="4210" b="10642">active</wd>

<space/>

<wd l="4286" t="10536" r="4728" b="10642">users</wd>

<space/>

</run>

<wd l="4829" t="10445" r="4944" b="10642"><run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="5088" t="10488" r="5808" b="10642">Amount</wd>

<space/>

</run>

</ln>

<ln l="1445" t="10752" r="5808" b="10954" baseLine="10906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1445" t="10752" r="1637" b="10910">of</wd>

<space/>

<wd l="1718" t="10805" r="2088" b="10910">user</wd>

<space/>

<wd l="2189" t="10752" r="3034" b="10954">generated</wd>

<space/>

<wd l="3134" t="10776" r="3850" b="10910">contents</wd>

<space/>

<wd l="3960" t="10805" r="4339" b="10910">over</wd>

<space/>

<wd l="4435" t="10752" r="4699" b="10910">the</wd>

<space/>

<wd l="4800" t="10752" r="5160" b="10910">web</wd>

<space/>

<wd l="5261" t="10752" r="5808" b="10910">would</wd>

<space/>

</ln>

<ln l="1440" t="11026" r="5808" b="11227" baseLine="11174" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="11026" r="1646" b="11184">be</wd>

<space/>

<wd l="1723" t="11026" r="2698" b="11227">unarguably</wd>

<space/>

<wd l="2779" t="11078" r="3629" b="11184">enormous</wd>

<space/>

<wd l="3715" t="11026" r="3970" b="11184">i.e.</wd>

<space/>

<wd l="4118" t="11026" r="4694" b="11184">almost</wd>

<space/>

<wd l="4776" t="11026" r="5093" b="11184">500</wd>

<space/>

<wd l="5174" t="11026" r="5808" b="11184">million</wd>

<space/>

</ln>

<ln l="1440" t="11261" r="5813" b="11496" baseLine="11446">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="1440" t="11318" r="1987" b="11453">tweets</wd>

<space/>

<wd l="2083" t="11347" r="2362" b="11496">per</wd>

<space/>

<wd l="2448" t="11294" r="2755" b="11496">day</wd>

<space/>

</run>

<wd l="2846" t="11261" r="2976" b="11453"><run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="3139" t="11294" r="3480" b="11453">The</wd>

<space/>

<wd l="3562" t="11294" r="3893" b="11453">fact</wd>

<space/>

<wd l="3974" t="11294" r="4301" b="11453">that</wd>

<space/>

<wd l="4382" t="11294" r="5011" b="11453">Twitter</wd>

<space/>

<wd l="5098" t="11294" r="5458" b="11453">data</wd>

<space/>

</run>

<wd l="5549" t="11299" r="5813" b="11491"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">or</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

</run>

</ln>

<ln l="1445" t="11568" r="5794" b="11770" baseLine="11717">

<wd l="1445" t="11573" r="2050" b="11765"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">tweets</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="2117" t="11621" r="2381" b="11726">are</wd>

<space/>

<wd l="2438" t="11568" r="3197" b="11770">typically</wd>

<space/>

<wd l="3254" t="11568" r="3725" b="11770">noisy</wd>

<space/>

<wd l="3787" t="11568" r="4099" b="11726">and</wd>

<space/>

<wd l="4157" t="11568" r="5246" b="11726">unstructured</wd>

<space/>

<wd l="5299" t="11568" r="5472" b="11722">in</wd>

<space/>

<wd l="5525" t="11621" r="5794" b="11726">na-</wd>

</run>

</ln>

<ln l="1440" t="11837" r="5803" b="12038" baseLine="11990" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="11861" r="1781" b="11995">ture</wd>

<space/>

<wd l="1886" t="11890" r="2146" b="11995">are</wd>

<space/>

<wd l="2256" t="11837" r="2563" b="11995">due</wd>

<space/>

<wd l="2669" t="11861" r="2832" b="11995">to</wd>

<space/>

<wd l="2947" t="11837" r="3547" b="11995">several</wd>

<space/>

<wd l="3653" t="11837" r="4738" b="12038">grammatical</wd>

<space/>

<wd l="4848" t="11842" r="5011" b="11995">&amp;</wd>

<space/>

<wd l="5126" t="11837" r="5803" b="12038">spelling</wd>

<space/>

</ln>

<ln l="1440" t="12110" r="5803" b="12307" baseLine="12259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="12110" r="2194" b="12269">mistakes</wd>

<space/>

<wd l="2256" t="12110" r="2381" b="12269">it</wd>

<space/>

<wd l="2434" t="12110" r="3115" b="12269">contain.</wd>

<space/>

<wd l="3197" t="12110" r="3533" b="12269">The</wd>

<space/>

<wd l="3600" t="12110" r="3926" b="12269">size</wd>

<space/>

<wd l="3984" t="12110" r="4834" b="12269">limitation</wd>

<space/>

<wd l="4896" t="12110" r="5803" b="12307">(constitute</wd>

<space/>

</ln>

<ln l="1440" t="12379" r="5808" b="12581" baseLine="12533" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="12403" r="1824" b="12581">upto</wd>

<space/>

<wd l="1901" t="12384" r="2198" b="12538">140</wd>

<space/>

<wd l="2256" t="12379" r="3130" b="12538">characters</wd>

<space/>

<wd l="3192" t="12379" r="3638" b="12581">only)</wd>

<space/>

<wd l="3696" t="12379" r="3835" b="12538">is</wd>

<space/>

<wd l="3888" t="12379" r="4157" b="12538">the</wd>

<space/>

<wd l="4210" t="12379" r="4862" b="12538">another</wd>

<space/>

<wd l="4910" t="12379" r="5808" b="12581">prominent</wd>

<space/>

</ln>

<ln l="1440" t="12653" r="5808" b="12811" baseLine="12802" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="12706" r="2054" b="12811">reason.</wd>

<space/>

<wd l="2150" t="12658" r="2285" b="12811">It</wd>

<space/>

<wd l="2352" t="12653" r="3067" b="12811">confines</wd>

<space/>

<wd l="3139" t="12706" r="3235" b="12811">a</wd>

<space/>

<wd l="3293" t="12706" r="3662" b="12811">user</wd>

<space/>

<wd l="3720" t="12677" r="3883" b="12811">to</wd>

<space/>

<wd l="3955" t="12653" r="4498" b="12811">devise</wd>

<space/>

<wd l="4565" t="12653" r="5309" b="12811">different</wd>

<space/>

<wd l="5381" t="12653" r="5808" b="12811">short</wd>

<space/>

</ln>

<ln l="1440" t="12922" r="5808" b="13123" baseLine="13070">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="1440" t="12922" r="1939" b="13080">forms</wd>

<space/>

</run>

<wd l="2021" t="12926" r="2371" b="13123"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">e.g.</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="2506" t="12926" r="2645" b="13080">‘c</wd>

<space/>

<wd l="2707" t="12974" r="2808" b="13080">u</wd>

<space/>

<wd l="2875" t="12922" r="3144" b="13080">ltr.’</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="3235" t="12922" r="3494" b="13080">for</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="3581" t="12926" r="3893" b="13080">‘see</wd>

<space/>

<wd l="3950" t="12974" r="4272" b="13123">you</wd>

<space/>

</run>

<wd l="4339" t="12922" r="4882" b="13118"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">later.’</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="4982" t="12922" r="5170" b="13080">of</wd>

<space/>

<wd l="5222" t="12974" r="5318" b="13080">a</wd>

<space/>

<wd l="5376" t="12922" r="5808" b="13080">valid</wd>

<space/>

</run>

</ln>

<ln l="1440" t="13195" r="5808" b="13397" baseLine="13344" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="13195" r="1930" b="13354">word.</wd>

<space/>

<wd l="2088" t="13195" r="3115" b="13397">Interpreting</wd>

<space/>

<wd l="3206" t="13195" r="3600" b="13354">such</wd>

<space/>

<wd l="3677" t="13195" r="4181" b="13354">forms</wd>

<space/>

<wd l="4267" t="13248" r="4642" b="13397">may</wd>

<space/>

<wd l="4723" t="13195" r="4930" b="13354">be</wd>

<space/>

<wd l="5016" t="13248" r="5213" b="13354">an</wd>

<space/>

<wd l="5299" t="13195" r="5808" b="13354">easier</wd>

<space/>

</ln>

<ln l="1440" t="13464" r="5808" b="13666" baseLine="13613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="13464" r="1795" b="13622">task</wd>

<space/>

<wd l="1834" t="13464" r="2093" b="13622">for</wd>

<space/>

<wd l="2136" t="13517" r="2232" b="13622">a</wd>

<space/>

<wd l="2270" t="13464" r="2861" b="13622">human</wd>

<space/>

<wd l="2904" t="13464" r="3389" b="13666">being</wd>

<space/>

<wd l="3432" t="13464" r="3749" b="13651">but,</wd>

<space/>

<wd l="3806" t="13464" r="3946" b="13622">is</wd>

<space/>

<wd l="3994" t="13517" r="4378" b="13666">very</wd>

<space/>

<wd l="4426" t="13464" r="5107" b="13622">difficult</wd>

<space/>

<wd l="5150" t="13488" r="5314" b="13622">to</wd>

<space/>

<wd l="5362" t="13464" r="5808" b="13622">build</wd>

<space/>

</ln>

<ln l="1445" t="13733" r="5808" b="13934" baseLine="13886" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1445" t="13786" r="1642" b="13891">an</wd>

<space/>

<wd l="1694" t="13757" r="2414" b="13891">accurate</wd>

<space/>

<wd l="2467" t="13757" r="3067" b="13934">system</wd>

<space/>

<wd l="3106" t="13733" r="3365" b="13891">for</wd>

<space/>

<wd l="3413" t="13733" r="4042" b="13934">solving</wd>

<space/>

<wd l="4094" t="13786" r="4397" b="13934">any</wd>

<space/>

<wd l="4440" t="13733" r="5174" b="13934">problem</wd>

<space/>

<wd l="5213" t="13733" r="5808" b="13891">related</wd>

<space/>

</ln>

<ln l="1440" t="14006" r="5794" b="14208" baseLine="14155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="14030" r="1608" b="14165">to</wd>

<space/>

<wd l="1694" t="14006" r="2299" b="14165">natural</wd>

<space/>

<wd l="2386" t="14006" r="3173" b="14208">language</wd>

<space/>

<wd l="3259" t="14006" r="4190" b="14208">processing</wd>

<space/>

<wd l="4286" t="14011" r="4877" b="14203">(NLP).</wd>

<space/>

<wd l="4973" t="14011" r="5194" b="14165">At</wd>

<space/>

<wd l="5280" t="14006" r="5794" b="14194">times,</wd>

<space/>

</ln>

<ln l="1440" t="14275" r="5803" b="14477" baseLine="14429" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1440" t="14328" r="1810" b="14434">user</wd>

<space/>

<wd l="1872" t="14299" r="2227" b="14477">puts</wd>

<space/>

<wd l="2304" t="14299" r="2736" b="14434">extra</wd>

<space/>

<wd l="2803" t="14275" r="3605" b="14477">emphasis</wd>

<space/>

<wd l="3682" t="14275" r="3893" b="14477">by</wd>

<space/>

<wd l="3974" t="14275" r="5803" b="14477">stretching/elongating</wd>

</ln>

</para>

<rulerline l="1435" t="14630" r="2640" b="14630" type="single" width="10" color="000000"/>

<para l="1704" t="14683" r="4330" b="14885" alignment="left" li="216" spaceBefore="59" lsp="exactly" lspExact="206" language="en">

<ln l="1704" t="14683" r="4330" b="14885" baseLine="14839">

<wd l="1704" t="14683" r="4330" b="14885"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">http://en.wikipedia.org/wiki/Twitter</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1440" t="14904" r="5530" b="15302" alignment="left" ri="288" spaceBefore="14" fli="216" lsp="exactly" lspExact="202" language="en">

<ln l="1694" t="14904" r="5530" b="15106" baseLine="15056">

<wd l="1694" t="14904" r="5530" b="15106">2http://www.cnet.com/news/report-twitter-hits-half-a-</wd>

</ln>

<run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">http://www.cnet.com/news/report-twitter-hits-half-a-</run>

<ln l="1440" t="15134" r="2938" b="15302" baseLine="15259" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="15134" r="2938" b="15302">billion-tweets-a-day/</wd>

</ln>

</para>

</column>

<column l="6144" t="4092" r="10531" b="15317">

<para l="6144" t="4157" r="10507" b="4901" alignment="justified" spaceBefore="4" lsp="exactly" lspExact="271" language="en">

<ln l="6149" t="4157" r="10502" b="4358" baseLine="4306" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="4210" r="6245" b="4315">a</wd>

<space/>

<wd l="6312" t="4157" r="6744" b="4315">valid</wd>

<space/>

<wd l="6811" t="4157" r="7258" b="4315">word</wd>

<space/>

<wd l="7325" t="4181" r="7488" b="4315">to</wd>

<space/>

<wd l="7565" t="4210" r="8203" b="4358">express</wd>

<space/>

<wd l="8280" t="4157" r="8683" b="4315">their</wd>

<space/>

<wd l="8750" t="4157" r="9480" b="4358">feelings.</wd>

<space/>

<wd l="9600" t="4162" r="9907" b="4315">For</wd>

<space/>

<wd l="9974" t="4210" r="10502" b="4315">exam-</wd>

</ln>

<ln l="6144" t="4426" r="10507" b="4627" baseLine="4579">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6144" t="4426" r="6456" b="4627">ple,</wd>

<space/>

<wd l="6557" t="4426" r="6926" b="4627">they</wd>

<space/>

<wd l="7018" t="4426" r="7459" b="4584">often</wd>

<space/>

<wd l="7541" t="4478" r="7829" b="4584">use</wd>

<space/>

<wd l="7915" t="4426" r="8362" b="4584">word</wd>

<space/>

<wd l="8443" t="4426" r="8765" b="4584">like</wd>

<space/>

</run>

<wd l="8875" t="4430" r="9696" b="4627"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">‘</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">yeeessss</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">’</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="9802" t="4450" r="9970" b="4584">to</wd>

<space/>

<wd l="10066" t="4426" r="10507" b="4584">show</wd>

<space/>

</run>

</ln>

<ln l="6144" t="4699" r="10498" b="4901" baseLine="4848">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6144" t="4699" r="6552" b="4858">their</wd>

<space/>

<wd l="6595" t="4699" r="7498" b="4901">happiness,</wd>

<space/>

<wd l="7560" t="4699" r="8098" b="4858">which</wd>

<space/>

<wd l="8146" t="4699" r="8285" b="4858">is</wd>

<space/>

<wd l="8347" t="4752" r="8443" b="4858">a</wd>

<space/>

<wd l="8496" t="4699" r="9278" b="4858">stretched</wd>

<space/>

<wd l="9326" t="4699" r="9754" b="4858">form</wd>

<space/>

<wd l="9806" t="4699" r="9998" b="4858">of</wd>

<space/>

</run>

<wd l="10056" t="4704" r="10498" b="4901"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">‘</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">yes</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">’.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6144" t="5078" r="10526" b="15307" alignment="justified" spaceBefore="110" fli="216" lsp="exactly" lspExact="270" language="en">

<ln l="6365" t="5078" r="10512" b="5280" baseLine="5232" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6365" t="5078" r="7627" b="5237">Normalization</wd>

<space/>

<wd l="7685" t="5078" r="7877" b="5237">of</wd>

<space/>

<wd l="7920" t="5078" r="8386" b="5280">noisy</wd>

<space/>

<wd l="8448" t="5102" r="8774" b="5237">text</wd>

<space/>

<wd l="8827" t="5078" r="8966" b="5237">is</wd>

<space/>

<wd l="9034" t="5131" r="9230" b="5237">an</wd>

<space/>

<wd l="9293" t="5078" r="10142" b="5280">important</wd>

<space/>

<wd l="10200" t="5078" r="10512" b="5237">and</wd>

<space/>

</ln>

<ln l="6144" t="5352" r="10502" b="5554" baseLine="5501" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="5405" r="6989" b="5554">necessary</wd>

<space/>

<wd l="7066" t="5352" r="8347" b="5554">pre-processing</wd>

<space/>

<wd l="8419" t="5352" r="8774" b="5510">task</wd>

<space/>

<wd l="8842" t="5352" r="9101" b="5510">for</wd>

<space/>

<wd l="9168" t="5352" r="9888" b="5554">building</wd>

<space/>

<wd l="9970" t="5352" r="10502" b="5510">differ-</wd>

</ln>

<ln l="6149" t="5621" r="10512" b="5822" baseLine="5774" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="5645" r="6413" b="5779">ent</wd>

<space/>

<wd l="6533" t="5621" r="7574" b="5822">applications</wd>

<space/>

<wd l="7699" t="5621" r="8294" b="5779">related</wd>

<space/>

<wd l="8410" t="5645" r="8578" b="5779">to</wd>

<space/>

<wd l="8698" t="5645" r="9024" b="5779">text</wd>

<space/>

<wd l="9134" t="5621" r="10114" b="5822">processing.</wd>

<space/>

<wd l="10378" t="5626" r="10512" b="5779">It</wd>

<space/>

</ln>

<ln l="6144" t="5894" r="10512" b="6096" baseLine="6043" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="5894" r="6283" b="6053">is</wd>

<space/>

<wd l="6408" t="5918" r="6912" b="6096">pretty</wd>

<space/>

<wd l="7042" t="5894" r="7714" b="6053">obvious</wd>

<space/>

<wd l="7838" t="5894" r="8266" b="6053">from</wd>

<space/>

<wd l="8381" t="5894" r="9010" b="6053">various</wd>

<space/>

<wd l="9144" t="5894" r="9734" b="6053">studies</wd>

<space/>

<wd l="9869" t="5894" r="10234" b="6091">(Liu</wd>

<space/>

<wd l="10358" t="5918" r="10512" b="6053">et</wd>

<space/>

</ln>

<ln l="6149" t="6163" r="10526" b="6365" baseLine="6317" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="6163" r="6403" b="6350">al.,</wd>

<space/>

<wd l="6528" t="6168" r="7008" b="6350">2011;</wd>

<space/>

<wd l="7147" t="6168" r="7694" b="6322">Foster</wd>

<space/>

<wd l="7795" t="6187" r="7949" b="6322">et</wd>

<space/>

<wd l="8054" t="6163" r="8304" b="6350">al.,</wd>

<space/>

<wd l="8434" t="6168" r="8928" b="6360">2011)</wd>

<space/>

<wd l="9038" t="6163" r="9365" b="6322">that</wd>

<space/>

<wd l="9466" t="6216" r="10229" b="6365">presence</wd>

<space/>

<wd l="10334" t="6163" r="10526" b="6322">of</wd>

<space/>

</ln>

<ln l="6144" t="6437" r="10502" b="6638" baseLine="6586" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="6437" r="6614" b="6638">noisy</wd>

<space/>

<wd l="6701" t="6461" r="7104" b="6595">texts</wd>

<space/>

<wd l="7190" t="6437" r="7738" b="6595">makes</wd>

<space/>

<wd l="7834" t="6490" r="8141" b="6638">any</wd>

<space/>

<wd l="8227" t="6437" r="8827" b="6595">natural</wd>

<space/>

<wd l="8914" t="6437" r="9701" b="6638">language</wd>

<space/>

<wd l="9782" t="6490" r="10502" b="6638">process-</wd>

</ln>

<ln l="6144" t="6706" r="10502" b="6907" baseLine="6854" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="6706" r="6422" b="6907">ing</wd>

<space/>

<wd l="6514" t="6710" r="7051" b="6902">(NLP)</wd>

<space/>

<wd l="7142" t="6706" r="7493" b="6864">task</wd>

<space/>

<wd l="7570" t="6758" r="7954" b="6907">very</wd>

<space/>

<wd l="8035" t="6706" r="8654" b="6864">tedious</wd>

<space/>

<wd l="8746" t="6730" r="8909" b="6864">to</wd>

<space/>

<wd l="8995" t="6706" r="9648" b="6864">achieve</wd>

<space/>

<wd l="9734" t="6706" r="10166" b="6907">good</wd>

<space/>

<wd l="10248" t="6758" r="10502" b="6864">ac-</wd>

</ln>

<ln l="6149" t="6974" r="10502" b="7176" baseLine="7128" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="7027" r="6720" b="7176">curacy</wd>

<space/>

<wd l="6797" t="6974" r="7344" b="7133">levels.</wd>

<space/>

<wd l="7483" t="6974" r="7824" b="7133">The</wd>

<space/>

<wd l="7901" t="6974" r="8270" b="7176">goal</wd>

<space/>

<wd l="8352" t="6974" r="8544" b="7133">of</wd>

<space/>

<wd l="8606" t="6974" r="9821" b="7133">normalization</wd>

<space/>

<wd l="9893" t="6974" r="10032" b="7133">is</wd>

<space/>

<wd l="10114" t="6998" r="10502" b="7133">two-</wd>

</ln>

<ln l="6144" t="7248" r="10502" b="7445" baseLine="7397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="7248" r="6542" b="7435">fold,</wd>

<space/>

<wd l="6643" t="7248" r="6898" b="7406">i.e.</wd>

<space/>

<wd l="7066" t="7253" r="7219" b="7445">a)</wd>

<space/>

<wd l="7310" t="7248" r="8467" b="7406">identification</wd>

<space/>

<wd l="8554" t="7248" r="8741" b="7406">of</wd>

<space/>

<wd l="8818" t="7248" r="9725" b="7406">candidates</wd>

<space/>

<wd l="9816" t="7248" r="10075" b="7406">for</wd>

<space/>

<wd l="10152" t="7301" r="10502" b="7406">nor-</wd>

</ln>

<ln l="6144" t="7517" r="10502" b="7718" baseLine="7670" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="7517" r="7070" b="7675">malization</wd>

<space/>

<wd l="7138" t="7517" r="7450" b="7675">and</wd>

<space/>

<wd l="7517" t="7517" r="7690" b="7714">b)</wd>

<space/>

<wd l="7771" t="7517" r="8683" b="7718">converting</wd>

<space/>

<wd l="8755" t="7517" r="9019" b="7675">the</wd>

<space/>

<wd l="9091" t="7517" r="9922" b="7675">candidate</wd>

<space/>

<wd l="9994" t="7517" r="10502" b="7675">word-</wd>

</ln>

<ln l="6144" t="7790" r="10502" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="7790" r="6648" b="7949">forms</wd>

<space/>

<wd l="6744" t="7814" r="6912" b="7949">to</wd>

<space/>

<wd l="7003" t="7790" r="7272" b="7949">the</wd>

<space/>

<wd l="7363" t="7790" r="8342" b="7949">normalized</wd>

<space/>

<wd l="8434" t="7790" r="8899" b="7949">form.</wd>

<space/>

<wd l="9086" t="7790" r="9677" b="7949">Unlike</wd>

<space/>

<wd l="9768" t="7790" r="10032" b="7949">the</wd>

<space/>

<wd l="10128" t="7843" r="10502" b="7992">gen-</wd>

</ln>

<ln l="6149" t="8059" r="10502" b="8261" baseLine="8213" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="8059" r="6470" b="8218">eral</wd>

<space/>

<wd l="6533" t="8059" r="7834" b="8218">well-formatted</wd>

<space/>

<wd l="7896" t="8112" r="8520" b="8261">corpus,</wd>

<space/>

<wd l="8592" t="8059" r="8918" b="8218">like</wd>

<space/>

<wd l="8981" t="8059" r="9854" b="8246">newswire,</wd>

<space/>

<wd l="9926" t="8059" r="10051" b="8218">it</wd>

<space/>

<wd l="10114" t="8059" r="10502" b="8218">does</wd>

<space/>

</ln>

<ln l="6144" t="8333" r="10502" b="8534" baseLine="8482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="8357" r="6427" b="8491">not</wd>

<space/>

<wd l="6514" t="8333" r="7104" b="8534">always</wd>

<space/>

<wd l="7205" t="8333" r="7843" b="8491">contain</wd>

<space/>

<wd l="7930" t="8333" r="8395" b="8534">noisy</wd>

<space/>

<wd l="8486" t="8357" r="8856" b="8491">text.</wd>

<space/>

<wd l="9029" t="8338" r="9240" b="8491">Its</wd>

<space/>

<wd l="9336" t="8333" r="9773" b="8491">main</wd>

<space/>

<wd l="9864" t="8386" r="10502" b="8491">sources</wd>

<space/>

</ln>

<ln l="6149" t="8602" r="10512" b="8803" baseLine="8755" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="8654" r="6413" b="8760">are</wd>

<space/>

<wd l="6470" t="8602" r="7253" b="8803">normally</wd>

<space/>

<wd l="7315" t="8602" r="7776" b="8760">those</wd>

<space/>

<wd l="7838" t="8602" r="8664" b="8803">platforms</wd>

<space/>

<wd l="8736" t="8654" r="8952" b="8760">on</wd>

<space/>

<wd l="9010" t="8602" r="9542" b="8760">which</wd>

<space/>

<wd l="9600" t="8654" r="10042" b="8760">users</wd>

<space/>

<wd l="10109" t="8602" r="10512" b="8760">have</wd>

<space/>

</ln>

<ln l="6149" t="8875" r="10502" b="9077" baseLine="9024" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="8875" r="6946" b="9077">complete</wd>

<space/>

<wd l="7018" t="8875" r="7747" b="9034">freedom</wd>

<space/>

<wd l="7819" t="8899" r="7987" b="9034">to</wd>

<space/>

<wd l="8064" t="8928" r="8702" b="9077">express</wd>

<space/>

<wd l="8784" t="8875" r="9797" b="9034">themselves.</wd>

<space/>

<wd l="9931" t="8875" r="10502" b="9034">There-</wd>

</ln>

<ln l="6144" t="9144" r="10517" b="9346" baseLine="9298" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="9144" r="6542" b="9331">fore,</wd>

<space/>

<wd l="6638" t="9197" r="7008" b="9302">user</wd>

<space/>

<wd l="7090" t="9144" r="7934" b="9346">generated</wd>

<space/>

<wd l="8011" t="9168" r="8563" b="9302">tweets</wd>

<space/>

<wd l="8654" t="9197" r="8914" b="9302">are</wd>

<space/>

<wd l="9000" t="9197" r="9312" b="9302">one</wd>

<space/>

<wd l="9398" t="9144" r="9586" b="9302">of</wd>

<space/>

<wd l="9653" t="9144" r="9922" b="9302">the</wd>

<space/>

<wd l="10003" t="9144" r="10517" b="9346">major</wd>

<space/>

</ln>

<ln l="6154" t="9418" r="10502" b="9619" baseLine="9566" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6154" t="9470" r="6792" b="9576">sources</wd>

<space/>

<wd l="6864" t="9418" r="7051" b="9576">of</wd>

<space/>

<wd l="7094" t="9418" r="7565" b="9619">noisy</wd>

<space/>

<wd l="7627" t="9442" r="8078" b="9576">texts.</wd>

<space/>

<wd l="8165" t="9422" r="8347" b="9571">In</wd>

<space/>

<wd l="8405" t="9418" r="8669" b="9576">the</wd>

<space/>

<wd l="8726" t="9418" r="9034" b="9576">last</wd>

<space/>

<wd l="9091" t="9418" r="9749" b="9619">couples</wd>

<space/>

<wd l="9816" t="9418" r="10008" b="9576">of</wd>

<space/>

<wd l="10051" t="9470" r="10502" b="9619">years</wd>

<space/>

</ln>

<ln l="6144" t="9686" r="10507" b="9888" baseLine="9835" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="9686" r="7118" b="9845">researchers</wd>

<space/>

<wd l="7181" t="9739" r="7709" b="9845">across</wd>

<space/>

<wd l="7766" t="9686" r="8698" b="9845">worldwide</wd>

<space/>

<wd l="8750" t="9739" r="9010" b="9845">are</wd>

<space/>

<wd l="9062" t="9686" r="9734" b="9888">actively</wd>

<space/>

<wd l="9787" t="9686" r="10507" b="9888">working</wd>

<space/>

</ln>

<ln l="6144" t="9960" r="10517" b="10162" baseLine="10109" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="9960" r="6403" b="10118">for</wd>

<space/>

<wd l="6470" t="9960" r="6739" b="10118">the</wd>

<space/>

<wd l="6811" t="9960" r="8021" b="10118">normalization</wd>

<space/>

<wd l="8098" t="9960" r="8290" b="10118">of</wd>

<space/>

<wd l="8347" t="9960" r="8813" b="10162">noisy</wd>

<space/>

<wd l="8894" t="9984" r="9610" b="10118">contents</wd>

<space/>

<wd l="9691" t="9960" r="9883" b="10118">of</wd>

<space/>

<wd l="9941" t="9960" r="10517" b="10118">twitter</wd>

<space/>

</ln>

<ln l="6154" t="10229" r="10507" b="10430" baseLine="10378" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6154" t="10234" r="6576" b="10426">(Han</wd>

<space/>

<wd l="6658" t="10229" r="6970" b="10387">and</wd>

<space/>

<wd l="7042" t="10229" r="7824" b="10416">Baldwin,</wd>

<space/>

<wd l="7915" t="10234" r="8395" b="10416">2011;</wd>

<space/>

<wd l="8486" t="10229" r="8784" b="10387">Liu</wd>

<space/>

<wd l="8866" t="10253" r="9019" b="10387">et</wd>

<space/>

<wd l="9091" t="10229" r="9346" b="10416">al.,</wd>

<space/>

<wd l="9437" t="10234" r="9917" b="10416">2012;</wd>

<space/>

<wd l="10008" t="10234" r="10507" b="10430">Wang</wd>

<space/>

</ln>

<ln l="6149" t="10498" r="10502" b="10699" baseLine="10651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="10498" r="6461" b="10656">and</wd>

<space/>

<wd l="6557" t="10502" r="6869" b="10699">Ng,</wd>

<space/>

<wd l="6994" t="10502" r="7474" b="10685">2013;</wd>

<space/>

<wd l="7608" t="10502" r="8069" b="10656">Porta</wd>

<space/>

<wd l="8170" t="10498" r="8482" b="10656">and</wd>

<space/>

<wd l="8582" t="10498" r="9264" b="10685">Sancho,</wd>

<space/>

<wd l="9389" t="10502" r="9869" b="10685">2013;</wd>

<space/>

<wd l="10008" t="10498" r="10502" b="10656">Chru-</wd>

</ln>

<ln l="6144" t="10771" r="10502" b="10973" baseLine="10920" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="10771" r="6552" b="10973">pala,</wd>

<space/>

<wd l="6653" t="10776" r="7200" b="10968">2014).</wd>

<space/>

<wd l="7344" t="10776" r="7531" b="10925">In</wd>

<space/>

<wd l="7613" t="10776" r="8035" b="10968">(Han</wd>

<space/>

<wd l="8122" t="10771" r="8434" b="10930">and</wd>

<space/>

<wd l="8510" t="10771" r="9293" b="10958">Baldwin,</wd>

<space/>

<wd l="9394" t="10776" r="9941" b="10968">2011),</wd>

<space/>

<wd l="10037" t="10824" r="10133" b="10930">a</wd>

<space/>

<wd l="10210" t="10771" r="10502" b="10925">lin-</wd>

</ln>

<ln l="6149" t="11040" r="10502" b="11242" baseLine="11194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="11093" r="6418" b="11198">ear</wd>

<space/>

<wd l="6475" t="11045" r="7162" b="11242">Support</wd>

<space/>

<wd l="7219" t="11045" r="7790" b="11198">Vector</wd>

<space/>

<wd l="7843" t="11040" r="8606" b="11198">Machine</wd>

<space/>

<wd l="8674" t="11045" r="9274" b="11237">(SVM)</wd>

<space/>

<wd l="9346" t="11040" r="10118" b="11198">classifier</wd>

<space/>

<wd l="10176" t="11093" r="10502" b="11198">was</wd>

<space/>

</ln>

<ln l="6144" t="11314" r="10512" b="11515" baseLine="11462" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11314" r="6754" b="11472">trained</wd>

<space/>

<wd l="6840" t="11314" r="7099" b="11472">for</wd>

<space/>

<wd l="7190" t="11314" r="7982" b="11515">detecting</wd>

<space/>

<wd l="8074" t="11314" r="8962" b="11472">ill-formed</wd>

<space/>

<wd l="9048" t="11314" r="9624" b="11501">words,</wd>

<space/>

<wd l="9734" t="11314" r="10046" b="11472">and</wd>

<space/>

<wd l="10138" t="11314" r="10512" b="11472">then</wd>

<space/>

</ln>

<ln l="6144" t="11582" r="10502" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11582" r="7056" b="11784">performed</wd>

<space/>

<wd l="7114" t="11582" r="8328" b="11741">normalization</wd>

<space/>

<wd l="8390" t="11582" r="8885" b="11741">based</wd>

<space/>

<wd l="8952" t="11635" r="9168" b="11741">on</wd>

<space/>

<wd l="9226" t="11582" r="10502" b="11784">morphophone-</wd>

</ln>

<ln l="6144" t="11856" r="10512" b="12058" baseLine="12005" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11856" r="6466" b="12014">mic</wd>

<space/>

<wd l="6542" t="11856" r="7402" b="12058">similarity.</wd>

<space/>

<wd l="7502" t="11856" r="8534" b="12058">Application</wd>

<space/>

<wd l="8597" t="11856" r="8789" b="12014">of</wd>

<space/>

<wd l="8842" t="11856" r="9163" b="12014">edit</wd>

<space/>

<wd l="9230" t="11856" r="10128" b="12058">operations</wd>

<space/>

<wd l="10200" t="11856" r="10512" b="12014">and</wd>

<space/>

</ln>

<ln l="6144" t="12125" r="10502" b="12326" baseLine="12278" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12149" r="6936" b="12283">recurrent</wd>

<space/>

<wd l="6974" t="12125" r="7517" b="12283">neural</wd>

<space/>

<wd l="7565" t="12125" r="8530" b="12326">embedding</wd>

<space/>

<wd l="8578" t="12178" r="8875" b="12283">can</wd>

<space/>

<wd l="8918" t="12125" r="9125" b="12283">be</wd>

<space/>

<wd l="9168" t="12125" r="9677" b="12283">found</wd>

<space/>

<wd l="9715" t="12125" r="9888" b="12278">in</wd>

<space/>

<wd l="9941" t="12125" r="10502" b="12322">(Chru-</wd>

</ln>

<ln l="6144" t="12398" r="10512" b="12600" baseLine="12547" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12398" r="6552" b="12600">pala,</wd>

<space/>

<wd l="6643" t="12403" r="7138" b="12595">2014)</wd>

<space/>

<wd l="7214" t="12398" r="7474" b="12557">for</wd>

<space/>

<wd l="7541" t="12422" r="7867" b="12557">text</wd>

<space/>

<wd l="7934" t="12398" r="9187" b="12557">normalization.</wd>

<space/>

<wd l="9312" t="12398" r="9792" b="12557">Their</wd>

<space/>

<wd l="9854" t="12398" r="10512" b="12557">method</wd>

<space/>

</ln>

<ln l="6144" t="12667" r="10502" b="12869" baseLine="12816" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12667" r="6658" b="12826">learns</wd>

<space/>

<wd l="6778" t="12720" r="7565" b="12869">sequence</wd>

<space/>

<wd l="7670" t="12667" r="7862" b="12826">of</wd>

<space/>

<wd l="7949" t="12667" r="8275" b="12826">edit</wd>

<space/>

<wd l="8376" t="12667" r="9274" b="12869">operations</wd>

<space/>

<wd l="9379" t="12667" r="9850" b="12869">using</wd>

<space/>

<wd l="9960" t="12667" r="10502" b="12826">condi-</wd>

</ln>

<ln l="6144" t="12941" r="10507" b="13138" baseLine="13090" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12941" r="6638" b="13099">tional</wd>

<space/>

<wd l="6710" t="12941" r="7382" b="13099">random</wd>

<space/>

<wd l="7454" t="12941" r="7838" b="13099">field</wd>

<space/>

<wd l="7915" t="12946" r="8506" b="13138">(CRF).</wd>

<space/>

<wd l="8587" t="12946" r="8770" b="13094">In</wd>

<space/>

<wd l="8842" t="12941" r="9494" b="13099">another</wd>

<space/>

<wd l="9562" t="12941" r="10051" b="13128">work,</wd>

<space/>

<wd l="10147" t="12941" r="10507" b="13138">(Liu</wd>

<space/>

</ln>

<ln l="6149" t="13210" r="10502" b="13411" baseLine="13358" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="13234" r="6307" b="13368">et</wd>

<space/>

<wd l="6384" t="13210" r="6634" b="13397">al.,</wd>

<space/>

<wd l="6730" t="13214" r="7224" b="13406">2012)</wd>

<space/>

<wd l="7310" t="13210" r="8357" b="13411">investigated</wd>

<space/>

<wd l="8429" t="13210" r="8693" b="13368">the</wd>

<space/>

<wd l="8770" t="13210" r="9365" b="13368">human</wd>

<space/>

<wd l="9442" t="13210" r="10502" b="13411">perspectives</wd>

<space/>

</ln>

<ln l="6149" t="13478" r="10507" b="13680" baseLine="13632" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="13478" r="6341" b="13637">of</wd>

<space/>

<wd l="6403" t="13478" r="7224" b="13637">enhanced</wd>

<space/>

<wd l="7291" t="13478" r="7747" b="13637">letter</wd>

<space/>

<wd l="7819" t="13478" r="9139" b="13666">transformation,</wd>

<space/>

<wd l="9226" t="13478" r="9744" b="13637">visual</wd>

<space/>

<wd l="9821" t="13478" r="10507" b="13680">priming</wd>

<space/>

</ln>

<ln l="6149" t="13752" r="10502" b="13954" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="13752" r="6461" b="13910">and</wd>

<space/>

<wd l="6518" t="13752" r="6782" b="13910">the</wd>

<space/>

<wd l="6845" t="13752" r="7589" b="13954">phonetic</wd>

<space/>

<wd l="7661" t="13752" r="8486" b="13954">similarity</wd>

<space/>

<wd l="8549" t="13752" r="8803" b="13910">for</wd>

<space/>

<wd l="8861" t="13752" r="9125" b="13910">the</wd>

<space/>

<wd l="9182" t="13776" r="9509" b="13910">text</wd>

<space/>

<wd l="9566" t="13752" r="10502" b="13910">normaliza-</wd>

</ln>

<ln l="6144" t="14021" r="10502" b="14179" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="14021" r="6528" b="14179">tion.</wd>

<space/>

<wd l="6653" t="14021" r="6994" b="14179">The</wd>

<space/>

<wd l="7061" t="14074" r="7354" b="14179">use</wd>

<space/>

<wd l="7426" t="14021" r="7618" b="14179">of</wd>

<space/>

<wd l="7675" t="14021" r="8150" b="14179">beam</wd>

<space/>

<wd l="8227" t="14021" r="8774" b="14179">search</wd>

<space/>

<wd l="8851" t="14021" r="9538" b="14179">decoder</wd>

<space/>

<wd l="9610" t="14021" r="9922" b="14179">and</wd>

<space/>

<wd l="9994" t="14021" r="10502" b="14179">finite-</wd>

</ln>

<ln l="6154" t="14294" r="10498" b="14496" baseLine="14443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6154" t="14318" r="6542" b="14453">state</wd>

<space/>

<wd l="6634" t="14294" r="7618" b="14453">transducers</wd>

<space/>

<wd l="7718" t="14347" r="8011" b="14453">can</wd>

<space/>

<wd l="8102" t="14294" r="8309" b="14453">be</wd>

<space/>

<wd l="8405" t="14347" r="8784" b="14453">seen</wd>

<space/>

<wd l="8870" t="14294" r="9038" b="14448">in</wd>

<space/>

<wd l="9134" t="14299" r="9696" b="14496">(Wang</wd>

<space/>

<wd l="9792" t="14294" r="10104" b="14453">and</wd>

<space/>

<wd l="10190" t="14299" r="10498" b="14496">Ng,</wd>

<space/>

</ln>

<ln l="6149" t="14563" r="10502" b="14760" baseLine="14717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="14568" r="6629" b="14750">2013;</wd>

<space/>

<wd l="6720" t="14568" r="7181" b="14722">Porta</wd>

<space/>

<wd l="7253" t="14563" r="7565" b="14722">and</wd>

<space/>

<wd l="7637" t="14563" r="8318" b="14750">Sancho,</wd>

<space/>

<wd l="8405" t="14568" r="8899" b="14760">2013)</wd>

<space/>

<wd l="8981" t="14563" r="9235" b="14722">for</wd>

<space/>

<wd l="9302" t="14563" r="9566" b="14722">the</wd>

<space/>

<wd l="9638" t="14563" r="10085" b="14722">word</wd>

<space/>

<wd l="10152" t="14616" r="10502" b="14722">nor-</wd>

</ln>

<ln l="6144" t="14837" r="10502" b="15038" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="14837" r="7109" b="14995">malization.</wd>

<space/>

<wd l="7186" t="14837" r="7709" b="14995">These</wd>

<space/>

<wd l="7762" t="14837" r="8438" b="15038">existing</wd>

<space/>

<wd l="8491" t="14837" r="9014" b="14995">works</wd>

<space/>

<wd l="9077" t="14890" r="9336" b="14995">are</wd>

<space/>

<wd l="9384" t="14837" r="9883" b="14995">based</wd>

<space/>

<wd l="9936" t="14890" r="10147" b="14995">on</wd>

<space/>

<wd l="10200" t="14837" r="10502" b="14995">dif-</wd>

</ln>

<ln l="6144" t="15106" r="8414" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="15106" r="6658" b="15264">ferent</wd>

<space/>

<wd l="6720" t="15130" r="7248" b="15307">setups</wd>

<space/>

<wd l="7315" t="15106" r="7627" b="15264">and</wd>

<space/>

<wd l="7685" t="15106" r="8414" b="15264">datasets.</wd>

</ln>

</para>

</column>

</section>

<section l="1435" t="15317" r="10531" b="16480">

<column l="1435" t="15317" r="10531" b="16480">

<para l="5771" t="15787" r="6200" b="15946" alignment="centered" spaceBefore="406" lsp="exactly" lspExact="271" language="en">

<ln l="5837" t="15787" r="6134" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="24">

<wd l="5837" t="15787" r="6134" b="15946">106</wd>

</ln>

</para>

<para l="2827" t="16133" r="9072" b="16469" alignment="centered" spaceBefore="138" lsp="exactly" lspExact="170" language="en">

<ln l="2827" t="16133" r="9072" b="16301" baseLine="16253" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2827" t="16133" r="3710" b="16296">Proceedings</wd>

<space/>

<wd l="3763" t="16133" r="3926" b="16296">of</wd>

<space/>

<wd l="3950" t="16133" r="4162" b="16262">the</wd>

<space/>

<wd l="4200" t="16138" r="4531" b="16262">ACL</wd>

<space/>

<wd l="4574" t="16133" r="4934" b="16262">2015</wd>

<space/>

<wd l="4987" t="16133" r="5688" b="16296">Workshop</wd>

<space/>

<wd l="5741" t="16176" r="5914" b="16262">on</wd>

<space/>

<wd l="5957" t="16138" r="6365" b="16296">Noisy</wd>

<space/>

<wd l="6427" t="16133" r="7536" b="16296">User-generated</wd>

<space/>

</run>

<wd l="7584" t="16138" r="7901" b="16286"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Text</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7954" t="16171" r="8352" b="16301">pages</wd>

<space/>

<wd l="8424" t="16133" r="9072" b="16286">106–110,
</wd>

</run>

</ln>

<ln l="3029" t="16296" r="8870" b="16469" baseLine="16425" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="16301" r="3595" b="16469">Beijing,</wd>

<space/>

<wd l="3653" t="16301" r="4114" b="16454">China,</wd>

<space/>

<wd l="4166" t="16301" r="4459" b="16469">July</wd>

<space/>

<wd l="4512" t="16301" r="4723" b="16454">31,</wd>

<space/>

<wd l="4781" t="16301" r="5170" b="16430">2015.</wd>

<space/>

</run>

<wd l="5246" t="16296" r="5770" b="16469"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">�</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5822" t="16301" r="6672" b="16430">Association</wd>

<space/>

<wd l="6715" t="16301" r="6926" b="16430">for</wd>

<space/>

<wd l="6974" t="16301" r="8035" b="16469">Computational</wd>

<space/>

<wd l="8078" t="16301" r="8870" b="16469">Linguistics</wd>

</run>

</ln>

</para>

</column>

</section>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4316.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1433" marginTop="1263" marginRight="1380" marginBottom="1302" offsetX="8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1433" t="1263" r="10529" b="15328">

<column l="1433" t="1263" r="5820" b="15328">

<para l="1440" t="1320" r="5808" b="7483" alignment="justified" spaceBefore="21" fli="216" lsp="exactly" lspExact="270" language="en">

<ln l="1656" t="1320" r="5794" b="1478" baseLine="1474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1656" t="1325" r="1963" b="1478">For</wd>

<space/>

<wd l="2002" t="1320" r="2597" b="1478">further</wd>

<space/>

<wd l="2640" t="1320" r="3782" b="1478">advancement</wd>

<space/>

<wd l="3830" t="1320" r="4022" b="1478">of</wd>

<space/>

<wd l="4051" t="1320" r="4776" b="1478">research</wd>

<space/>

<wd l="4824" t="1373" r="5040" b="1478">on</wd>

<space/>

<wd l="5078" t="1344" r="5405" b="1478">text</wd>

<space/>

<wd l="5448" t="1373" r="5794" b="1478">nor-</wd>

</ln>

<ln l="1440" t="1594" r="5808" b="1795" baseLine="1742" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="1594" r="2362" b="1752">malization</wd>

<space/>

<wd l="2443" t="1594" r="2755" b="1752">and</wd>

<space/>

<wd l="2832" t="1618" r="3000" b="1752">to</wd>

<space/>

<wd l="3077" t="1594" r="3739" b="1795">provide</wd>

<space/>

<wd l="3826" t="1646" r="3922" b="1752">a</wd>

<space/>

<wd l="3998" t="1646" r="4762" b="1752">common</wd>

<space/>

<wd l="4838" t="1594" r="5808" b="1752">benchmark</wd>

<space/>

</ln>

<ln l="1450" t="1862" r="5794" b="2064" baseLine="2016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="1886" r="1896" b="2064">setup</wd>

<space/>

<wd l="1968" t="1862" r="2227" b="2021">for</wd>

<space/>

<wd l="2294" t="1862" r="3230" b="2050">evaluation,</wd>

<space/>

<wd l="3317" t="1915" r="3413" b="2021">a</wd>

<space/>

<wd l="3485" t="1862" r="4046" b="2021">shared</wd>

<space/>

<wd l="4114" t="1862" r="4464" b="2021">task</wd>

<space/>

<wd l="4536" t="1862" r="5462" b="2021">“ACL2015</wd>

<space/>

<wd l="5539" t="1867" r="5794" b="2021">W-</wd>

</ln>

<ln l="1440" t="2098" r="5789" b="2338" baseLine="2281">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1440" t="2141" r="1920" b="2294">NUT:</wd>

<space/>

<wd l="2030" t="2136" r="3288" b="2294">Normalization</wd>

<space/>

<wd l="3384" t="2136" r="3576" b="2294">of</wd>

<space/>

<wd l="3653" t="2136" r="4166" b="2338">Noisy</wd>

<space/>

<wd l="4262" t="2141" r="4646" b="2294">Text</wd>

<space/>

<wd l="4733" t="2136" r="4906" b="2290">in</wd>

<space/>

</run>

<wd l="4992" t="2098" r="5789" b="2294"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Twitter”</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">3</run>

</wd>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

</ln>

<ln l="1440" t="2405" r="5794" b="2606" baseLine="2558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="2458" r="1771" b="2563">was</wd>

<space/>

<wd l="1882" t="2405" r="2774" b="2606">organized.</wd>

<space/>

<wd l="2995" t="2405" r="3331" b="2563">The</wd>

<space/>

<wd l="3446" t="2405" r="4003" b="2563">shared</wd>

<space/>

<wd l="4104" t="2405" r="4459" b="2563">task</wd>

<space/>

<wd l="4555" t="2405" r="4872" b="2563">had</wd>

<space/>

<wd l="4973" t="2429" r="5294" b="2563">two</wd>

<space/>

<wd l="5400" t="2405" r="5794" b="2563">vari-</wd>

</ln>

<ln l="1445" t="2678" r="5794" b="2837" baseLine="2827" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="2702" r="1834" b="2837">ants:</wd>

<space/>

<wd l="1958" t="2678" r="2962" b="2837">constrained</wd>

<space/>

<wd l="3029" t="2678" r="3514" b="2837">mode</wd>

<space/>

<wd l="3590" t="2678" r="3902" b="2837">and</wd>

<space/>

<wd l="3970" t="2678" r="5198" b="2837">unconstrained</wd>

<space/>

<wd l="5266" t="2678" r="5794" b="2837">mode.</wd>

<space/>

</ln>

<ln l="1440" t="2947" r="5803" b="3149" baseLine="3101" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="2952" r="1723" b="3106">We</wd>

<space/>

<wd l="1834" t="2947" r="2866" b="3149">participated</wd>

<space/>

<wd l="2976" t="2947" r="3355" b="3149">only</wd>

<space/>

<wd l="3470" t="2947" r="3725" b="3106">for</wd>

<space/>

<wd l="3830" t="2947" r="4099" b="3106">the</wd>

<space/>

<wd l="4210" t="2947" r="5213" b="3106">constrained</wd>

<space/>

<wd l="5318" t="2947" r="5803" b="3106">mode</wd>

<space/>

</ln>

<ln l="1440" t="3221" r="5794" b="3422" baseLine="3370" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="3221" r="1973" b="3379">which</wd>

<space/>

<wd l="2069" t="3221" r="2342" b="3379">did</wd>

<space/>

<wd l="2429" t="3245" r="2712" b="3379">not</wd>

<space/>

<wd l="2798" t="3221" r="3370" b="3422">permit</wd>

<space/>

<wd l="3456" t="3274" r="3638" b="3379">us</wd>

<space/>

<wd l="3739" t="3245" r="3902" b="3379">to</wd>

<space/>

<wd l="3994" t="3274" r="4286" b="3379">use</wd>

<space/>

<wd l="4378" t="3274" r="4685" b="3422">any</wd>

<space/>

<wd l="4781" t="3221" r="5472" b="3379">external</wd>

<space/>

<wd l="5563" t="3274" r="5794" b="3379">re-</wd>

</ln>

<ln l="1450" t="3490" r="5794" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="3542" r="2088" b="3648">sources</wd>

<space/>

<wd l="2208" t="3490" r="2765" b="3648">and/or</wd>

<space/>

<wd l="2866" t="3490" r="3283" b="3648">tools</wd>

<space/>

<wd l="3403" t="3514" r="3965" b="3691">except</wd>

<space/>

<wd l="4070" t="3490" r="4387" b="3648">few</wd>

<space/>

<wd l="4502" t="3490" r="4829" b="3648">that</wd>

<space/>

<wd l="4934" t="3542" r="5357" b="3648">were</wd>

<space/>

<wd l="5467" t="3542" r="5794" b="3648">rec-</wd>

</ln>

<ln l="1445" t="3763" r="5803" b="3965" baseLine="3912" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="3763" r="2410" b="3922">ommended</wd>

<space/>

<wd l="2501" t="3763" r="2717" b="3965">by</wd>

<space/>

<wd l="2808" t="3763" r="3072" b="3922">the</wd>

<space/>

<wd l="3168" t="3763" r="4114" b="3965">organizers.</wd>

<space/>

<wd l="4301" t="3768" r="4483" b="3917">In</wd>

<space/>

<wd l="4570" t="3763" r="4877" b="3922">this</wd>

<space/>

<wd l="4978" t="3816" r="5467" b="3965">paper</wd>

<space/>

<wd l="5549" t="3816" r="5803" b="3922">we</wd>

<space/>

</ln>

<ln l="1440" t="4032" r="5794" b="4234" baseLine="4181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="4061" r="1963" b="4234">report</wd>

<space/>

<wd l="2069" t="4085" r="2362" b="4190">our</wd>

<space/>

<wd l="2458" t="4032" r="2904" b="4190">work</wd>

<space/>

<wd l="3010" t="4032" r="3269" b="4190">for</wd>

<space/>

<wd l="3365" t="4032" r="4622" b="4190">normalization.</wd>

<space/>

<wd l="4848" t="4037" r="5131" b="4190">We</wd>

<space/>

<wd l="5237" t="4032" r="5794" b="4234">imple-</wd>

</ln>

<ln l="1440" t="4301" r="5794" b="4502" baseLine="4454" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="4301" r="2083" b="4459">mented</wd>

<space/>

<wd l="2198" t="4354" r="2294" b="4459">a</wd>

<space/>

<wd l="2405" t="4301" r="2971" b="4502">hybrid</wd>

<space/>

<wd l="3091" t="4325" r="3691" b="4502">system</wd>

<space/>

<wd l="3802" t="4301" r="4334" b="4459">where</wd>

<space/>

<wd l="4445" t="4301" r="5184" b="4459">machine</wd>

<space/>

<wd l="5294" t="4301" r="5794" b="4459">learn-</wd>

</ln>

<ln l="1440" t="4574" r="5803" b="4776" baseLine="4723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="4574" r="1714" b="4776">ing</wd>

<space/>

<wd l="1814" t="4574" r="2290" b="4776">along</wd>

<space/>

<wd l="2386" t="4574" r="2774" b="4733">with</wd>

<space/>

<wd l="2866" t="4574" r="3283" b="4733">rules</wd>

<space/>

<wd l="3384" t="4627" r="3648" b="4733">are</wd>

<space/>

<wd l="3739" t="4574" r="4392" b="4733">utilized</wd>

<space/>

<wd l="4483" t="4598" r="4651" b="4733">to</wd>

<space/>

<wd l="4747" t="4574" r="5453" b="4776">perform</wd>

<space/>

<wd l="5539" t="4574" r="5803" b="4733">the</wd>

<space/>

</ln>

<ln l="1440" t="4843" r="5798" b="5045" baseLine="4997" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="4843" r="1834" b="5002">task.</wd>

<space/>

<wd l="2083" t="4848" r="2366" b="5002">We</wd>

<space/>

<wd l="2482" t="4843" r="2885" b="5002">have</wd>

<space/>

<wd l="3000" t="4843" r="3806" b="5045">exploited</wd>

<space/>

<wd l="3917" t="4843" r="4488" b="5002">lexical</wd>

<space/>

<wd l="4608" t="4843" r="4920" b="5002">and</wd>

<space/>

<wd l="5040" t="4843" r="5798" b="5045">syntactic</wd>

<space/>

</ln>

<ln l="1440" t="5117" r="5784" b="5318" baseLine="5266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="5117" r="2304" b="5318">properties</wd>

<space/>

<wd l="2400" t="5117" r="2592" b="5275">of</wd>

<space/>

<wd l="2664" t="5170" r="2760" b="5275">a</wd>

<space/>

<wd l="2842" t="5141" r="3312" b="5275">tweet</wd>

<space/>

<wd l="3398" t="5170" r="3571" b="5275">as</wd>

<space/>

<wd l="3662" t="5117" r="4498" b="5275">discussed</wd>

<space/>

<wd l="4579" t="5117" r="4752" b="5270">in</wd>

<space/>

<wd l="4843" t="5117" r="5453" b="5275">section</wd>

<space/>

<wd l="5539" t="5122" r="5784" b="5275">3.1</wd>

<space/>

</ln>

<ln l="1440" t="5386" r="5803" b="5587" baseLine="5539" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="5410" r="1608" b="5544">to</wd>

<space/>

<wd l="1694" t="5386" r="2227" b="5544">derive</wd>

<space/>

<wd l="2314" t="5438" r="2410" b="5544">a</wd>

<space/>

<wd l="2486" t="5386" r="3091" b="5544">feature</wd>

<space/>

<wd l="3187" t="5410" r="3418" b="5544">set</wd>

<space/>

<wd l="3499" t="5386" r="3758" b="5544">for</wd>

<space/>

<wd l="3835" t="5386" r="4987" b="5544">identification</wd>

<space/>

<wd l="5074" t="5386" r="5266" b="5544">of</wd>

<space/>

<wd l="5333" t="5386" r="5803" b="5587">noisy</wd>

<space/>

</ln>

<ln l="1440" t="5659" r="5794" b="5861" baseLine="5808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="5683" r="1766" b="5818">text</wd>

<space/>

<wd l="1848" t="5659" r="2016" b="5813">in</wd>

<space/>

<wd l="2098" t="5659" r="2362" b="5818">the</wd>

<space/>

<wd l="2448" t="5659" r="2784" b="5818">first</wd>

<space/>

<wd l="2875" t="5683" r="3259" b="5861">step.</wd>

<space/>

<wd l="3422" t="5664" r="3706" b="5818">We</wd>

<space/>

<wd l="3787" t="5659" r="4190" b="5818">train</wd>

<space/>

<wd l="4272" t="5659" r="5294" b="5818">Conditional</wd>

<space/>

<wd l="5381" t="5664" r="5794" b="5818">Ran-</wd>

</ln>

<ln l="1445" t="5928" r="5794" b="6130" baseLine="6082" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="5928" r="1829" b="6086">dom</wd>

<space/>

<wd l="1901" t="5928" r="2347" b="6086">Field</wd>

<space/>

<wd l="2429" t="5933" r="2966" b="6125">(CRF)</wd>

<space/>

<wd l="3058" t="5928" r="3826" b="6130">(Lafferty</wd>

<space/>

<wd l="3902" t="5952" r="4056" b="6086">et</wd>

<space/>

<wd l="4133" t="5928" r="4382" b="6115">al.,</wd>

<space/>

<wd l="4474" t="5933" r="4968" b="6125">2001)</wd>

<space/>

<wd l="5050" t="5981" r="5222" b="6086">as</wd>

<space/>

<wd l="5304" t="5981" r="5400" b="6086">a</wd>

<space/>

<wd l="5467" t="5981" r="5794" b="6086">ma-</wd>

</ln>

<ln l="1445" t="6202" r="5794" b="6403" baseLine="6350" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="6202" r="1910" b="6360">chine</wd>

<space/>

<wd l="2026" t="6202" r="2736" b="6403">learning</wd>

<space/>

<wd l="2856" t="6202" r="3701" b="6403">algorithm</wd>

<space/>

<wd l="3811" t="6226" r="3979" b="6360">to</wd>

<space/>

<wd l="4090" t="6202" r="4766" b="6403">identify</wd>

<space/>

<wd l="4882" t="6202" r="5146" b="6360">the</wd>

<space/>

<wd l="5266" t="6202" r="5794" b="6360">candi-</wd>

</ln>

<ln l="1445" t="6470" r="5808" b="6629" baseLine="6624" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="6470" r="1805" b="6629">date</wd>

<space/>

<wd l="1891" t="6470" r="2842" b="6629">wordforms</wd>

<space/>

<wd l="2938" t="6470" r="3269" b="6629">that</wd>

<space/>

<wd l="3355" t="6470" r="3768" b="6629">need</wd>

<space/>

<wd l="3859" t="6494" r="4022" b="6629">to</wd>

<space/>

<wd l="4118" t="6470" r="4325" b="6629">be</wd>

<space/>

<wd l="4416" t="6470" r="5438" b="6629">normalized.</wd>

<space/>

<wd l="5626" t="6475" r="5808" b="6624">In</wd>

<space/>

</ln>

<ln l="1450" t="6744" r="5798" b="6946" baseLine="6893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="6744" r="2050" b="6902">second</wd>

<space/>

<wd l="2136" t="6768" r="2520" b="6946">step,</wd>

<space/>

<wd l="2621" t="6797" r="2875" b="6902">we</wd>

<space/>

<wd l="2962" t="6744" r="3442" b="6946">apply</wd>

<space/>

<wd l="3533" t="6797" r="3984" b="6902">some</wd>

<space/>

<wd l="4066" t="6744" r="4406" b="6902">rule</wd>

<space/>

<wd l="4488" t="6744" r="4987" b="6902">based</wd>

<space/>

<wd l="5064" t="6744" r="5798" b="6902">methods</wd>

<space/>

</ln>

<ln l="1450" t="7013" r="5803" b="7210" baseLine="7162" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="7018" r="1685" b="7210">(as</wd>

<space/>

<wd l="1781" t="7013" r="2419" b="7171">defined</wd>

<space/>

<wd l="2501" t="7013" r="2674" b="7166">in</wd>

<space/>

<wd l="2765" t="7013" r="3374" b="7171">section</wd>

<space/>

<wd l="3461" t="7018" r="3792" b="7210">3.2)</wd>

<space/>

<wd l="3883" t="7013" r="4056" b="7166">in</wd>

<space/>

<wd l="4142" t="7013" r="4603" b="7171">order</wd>

<space/>

<wd l="4680" t="7037" r="4848" b="7171">to</wd>

<space/>

<wd l="4934" t="7013" r="5803" b="7171">normalize</wd>

<space/>

</ln>

<ln l="1440" t="7282" r="5794" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="7282" r="1704" b="7440">the</wd>

<space/>

<wd l="1771" t="7282" r="2717" b="7440">wordforms</wd>

<space/>

<wd l="2789" t="7282" r="3326" b="7440">which</wd>

<space/>

<wd l="3389" t="7334" r="3811" b="7440">were</wd>

<space/>

<wd l="3874" t="7282" r="4699" b="7440">identified</wd>

<space/>

<wd l="4762" t="7282" r="4934" b="7435">in</wd>

<space/>

<wd l="5002" t="7282" r="5338" b="7440">first</wd>

<space/>

<wd l="5410" t="7306" r="5794" b="7483">step.</wd>

</ln>

</para>

<para l="1440" t="7555" r="5808" b="9110" alignment="justified" spaceBefore="7" fli="216" lsp="exactly" lspExact="270" language="en">

<ln l="1656" t="7555" r="5808" b="7757" baseLine="7704" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="7555" r="1997" b="7714">The</wd>

<space/>

<wd l="2074" t="7555" r="3154" b="7757">organization</wd>

<space/>

<wd l="3226" t="7555" r="3418" b="7714">of</wd>

<space/>

<wd l="3475" t="7555" r="3739" b="7714">the</wd>

<space/>

<wd l="3811" t="7608" r="4301" b="7757">paper</wd>

<space/>

<wd l="4363" t="7555" r="4502" b="7714">is</wd>

<space/>

<wd l="4584" t="7608" r="4752" b="7714">as</wd>

<space/>

<wd l="4834" t="7555" r="5525" b="7714">follows.</wd>

<space/>

<wd l="5650" t="7560" r="5808" b="7709">A</wd>

<space/>

</ln>

<ln l="1440" t="7824" r="5808" b="8026" baseLine="7978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="7824" r="1867" b="7982">brief</wd>

<space/>

<wd l="1939" t="7824" r="2856" b="7982">theoretical</wd>

<space/>

<wd l="2947" t="7824" r="3854" b="7982">discussion</wd>

<space/>

<wd l="3941" t="7877" r="4157" b="7982">on</wd>

<space/>

<wd l="4248" t="7829" r="4651" b="7982">CRF</wd>

<space/>

<wd l="4738" t="7824" r="4877" b="7982">is</wd>

<space/>

<wd l="4968" t="7824" r="5808" b="8026">presented</wd>

<space/>

</ln>

<ln l="1440" t="8098" r="5803" b="8256" baseLine="8246" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="8098" r="1613" b="8251">in</wd>

<space/>

<wd l="1694" t="8098" r="2304" b="8256">section</wd>

<space/>

<wd l="2386" t="8102" r="2534" b="8256">2.</wd>

<space/>

<wd l="2678" t="8098" r="3331" b="8256">Section</wd>

<space/>

<wd l="3408" t="8102" r="3499" b="8256">3</wd>

<space/>

<wd l="3595" t="8098" r="4214" b="8256">discuss</wd>

<space/>

<wd l="4301" t="8098" r="4781" b="8256">about</wd>

<space/>

<wd l="4858" t="8098" r="5122" b="8256">the</wd>

<space/>

<wd l="5198" t="8098" r="5803" b="8256">feature</wd>

<space/>

</ln>

<ln l="1450" t="8366" r="5794" b="8568" baseLine="8520" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="8390" r="1685" b="8525">set</wd>

<space/>

<wd l="1762" t="8366" r="2074" b="8525">and</wd>

<space/>

<wd l="2146" t="8366" r="3293" b="8568">methodology</wd>

<space/>

<wd l="3370" t="8366" r="3773" b="8525">used</wd>

<space/>

<wd l="3845" t="8366" r="4018" b="8520">in</wd>

<space/>

<wd l="4090" t="8366" r="4354" b="8525">the</wd>

<space/>

<wd l="4430" t="8366" r="5232" b="8568">proposed</wd>

<space/>

<wd l="5304" t="8366" r="5794" b="8525">work.</wd>

<space/>

</ln>

<ln l="1440" t="8640" r="5808" b="8842" baseLine="8789" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="8640" r="2611" b="8842">Experimental</wd>

<space/>

<wd l="2688" t="8640" r="3173" b="8798">result</wd>

<space/>

<wd l="3250" t="8640" r="3562" b="8798">and</wd>

<space/>

<wd l="3634" t="8640" r="4325" b="8842">analysis</wd>

<space/>

<wd l="4406" t="8693" r="4704" b="8798">can</wd>

<space/>

<wd l="4776" t="8640" r="4982" b="8798">be</wd>

<space/>

<wd l="5054" t="8640" r="5568" b="8798">found</wd>

<space/>

<wd l="5635" t="8640" r="5808" b="8794">in</wd>

<space/>

</ln>

<ln l="1450" t="8909" r="5434" b="9110" baseLine="9062" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="8909" r="2059" b="9067">section</wd>

<space/>

<wd l="2112" t="8914" r="2266" b="9067">4.</wd>

<space/>

<wd l="2342" t="8914" r="2630" b="9067">We</wd>

<space/>

<wd l="2688" t="8909" r="3470" b="9067">conclude</wd>

<space/>

<wd l="3528" t="8909" r="3792" b="9067">the</wd>

<space/>

<wd l="3850" t="8962" r="4334" b="9110">paper</wd>

<space/>

<wd l="4387" t="8909" r="4555" b="9062">in</wd>

<space/>

<wd l="4622" t="8909" r="5232" b="9067">section</wd>

<space/>

<wd l="5290" t="8909" r="5434" b="9067">5.</wd>

</ln>

</para>

<para l="1440" t="9389" r="5227" b="9600" alignment="left" spaceBefore="220" lsp="exactly" lspExact="269" language="en">

<ln l="1440" t="9389" r="5227" b="9600" baseLine="9552" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="1440" t="9389" r="1555" b="9557">2</wd>

<space/>

<wd l="1810" t="9389" r="3005" b="9562">Conditional</wd>

<space/>

<wd l="3072" t="9394" r="3941" b="9562">Random</wd>

<space/>

<wd l="4003" t="9389" r="4517" b="9562">Field</wd>

<space/>

<wd l="4594" t="9389" r="5227" b="9600">(CRF)</wd>

</ln>

</para>

<para l="1440" t="9821" r="5808" b="11616" alignment="justified" spaceBefore="170" lsp="exactly" lspExact="270" language="en">

<ln l="1445" t="9821" r="5794" b="10032" baseLine="9984" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="9830" r="2467" b="9989">Conditional</wd>

<space/>

<wd l="2568" t="9830" r="3307" b="9989">Random</wd>

<space/>

<wd l="3403" t="9830" r="3893" b="10018">Field,</wd>

<space/>

<wd l="4013" t="9830" r="4949" b="9989">introduced</wd>

<space/>

<wd l="5045" t="9830" r="5256" b="10032">by</wd>

<space/>

<wd l="5366" t="9830" r="5794" b="10027">(Laf-</wd>

</ln>

<ln l="1440" t="10104" r="5794" b="10306" baseLine="10253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="10104" r="1848" b="10306">ferty</wd>

<space/>

<wd l="1906" t="10128" r="2059" b="10262">et</wd>

<space/>

<wd l="2112" t="10104" r="2362" b="10291">al.,</wd>

<space/>

<wd l="2429" t="10109" r="2976" b="10301">2001),</wd>

<space/>

<wd l="3034" t="10104" r="3173" b="10262">is</wd>

<space/>

<wd l="3235" t="10157" r="3331" b="10262">a</wd>

<space/>

<wd l="3374" t="10104" r="3917" b="10262">robust</wd>

<space/>

<wd l="3974" t="10157" r="4762" b="10306">sequence</wd>

<space/>

<wd l="4810" t="10104" r="5525" b="10306">learning</wd>

<space/>

<wd l="5582" t="10104" r="5794" b="10262">al-</wd>

</ln>

<ln l="1445" t="10373" r="5808" b="10574" baseLine="10526" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="10373" r="2136" b="10574">gorithm</wd>

<space/>

<wd l="2194" t="10373" r="2693" b="10531">based</wd>

<space/>

<wd l="2760" t="10426" r="2976" b="10531">on</wd>

<space/>

<wd l="3038" t="10373" r="3307" b="10531">the</wd>

<space/>

<wd l="3374" t="10373" r="4349" b="10531">conditional</wd>

<space/>

<wd l="4416" t="10373" r="5405" b="10574">probability.</wd>

<space/>

<wd l="5515" t="10378" r="5808" b="10531">Let</wd>

<space/>

</ln>

<ln l="1445" t="10642" r="5789" b="10848" baseLine="10797">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1445" t="10699" r="1642" b="10805">an</wd>

<space/>

<wd l="1728" t="10646" r="2741" b="10805">observation</wd>

<space/>

<wd l="2827" t="10699" r="3614" b="10848">sequence</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="3706" t="10642" r="3859" b="10805">O</wd>

<space/>

</run>

<wd l="3984" t="10680" r="4296" b="10810"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">=</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">&lt;</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="4426" t="10699" r="4666" b="10843"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">o</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="4718" t="10699" r="4963" b="10843"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">o</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

<wd l="5026" t="10776" r="5242" b="10843">...,</wd>

<space/>

</run>

<wd l="5294" t="10699" r="5520" b="10834"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">o</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">T</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="5650" t="10680" r="5789" b="10810">&gt;</wd>

<space/>

</run>

</ln>

<ln l="1440" t="10915" r="5803" b="11117" baseLine="11069" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="10915" r="1579" b="11074">is</wd>

<space/>

<wd l="1646" t="10915" r="2160" b="11117">given,</wd>

<space/>

<wd l="2222" t="10915" r="2602" b="11074">then</wd>

<space/>

<wd l="2654" t="10915" r="2918" b="11074">the</wd>

<space/>

<wd l="2976" t="10915" r="3950" b="11074">conditional</wd>

<space/>

<wd l="4008" t="10915" r="4963" b="11117">probability</wd>

<space/>

<wd l="5026" t="10915" r="5218" b="11074">of</wd>

<space/>

<wd l="5261" t="10968" r="5357" b="11074">a</wd>

<space/>

<wd l="5414" t="10939" r="5803" b="11074">state</wd>

<space/>

</ln>

<ln l="1450" t="11184" r="5794" b="11390" baseLine="11340">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1450" t="11242" r="2237" b="11390">sequence</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="2333" t="11184" r="2467" b="11352">S</wd>

<space/>

</run>

<wd l="2597" t="11222" r="2914" b="11352"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">=</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">&lt;</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="3058" t="11242" r="3293" b="11386"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="3350" t="11242" r="3586" b="11386"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

<wd l="3648" t="11318" r="3864" b="11386">...,</wd>

<space/>

</run>

<wd l="3922" t="11242" r="4133" b="11376"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">T</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="4282" t="11222" r="4421" b="11352">&gt;</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4526" t="11242" r="4819" b="11347">can</wd>

<space/>

<wd l="4910" t="11189" r="5112" b="11347">be</wd>

<space/>

<wd l="5198" t="11189" r="5794" b="11347">formu-</wd>

</run>

</ln>

<ln l="1440" t="11458" r="2146" b="11616" baseLine="11611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="11458" r="1867" b="11616">lated</wd>

<space/>

<wd l="1925" t="11510" r="2146" b="11616">as:</wd>

</ln>

</para>

<para l="3245" t="11885" r="3715" b="11995" alignment="centered" spaceBefore="184" lsp="exactly" lspExact="143" language="en">

<ln l="3245" t="11885" r="3715" b="11995" baseLine="11995" italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="50">

<wd l="3245" t="11885" r="3365" b="11995">T</wd>

<space/>

<wd l="3571" t="11885" r="3715" b="11995">K</wd>

</ln>

</para>

<para l="1445" t="11957" r="5813" b="12341" alignment="left" lsp="exactly" lspExact="299" language="en">

<ln l="1445" t="11957" r="5813" b="12341" baseLine="12250">

<wd l="1445" t="12082" r="2141" b="12307"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">P</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">(</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">S</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">|</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">O</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">)</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10"><space/>

<wd l="2232" t="12168" r="2381" b="12221">=</wd>

<space/>

<space/>

<wd l="2563" t="11957" r="2640" b="12106">1</wd>

<space/>

</run>

<wd l="2750" t="12058" r="3442" b="12341"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">exp</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">E</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10"><wd l="3509" t="12058" r="3778" b="12341">E</wd>

<space/>

</run>

<wd l="3840" t="12096" r="4046" b="12293"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">λ</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">k</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10"><wd l="4138" t="12139" r="4248" b="12250">X</wd>

<space/>

</run>

<wd l="4339" t="12082" r="5059" b="12307"><run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">f</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">k</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">(</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">s</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">t−1</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">,</run>

</wd>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10"><space/>

</run>

<wd l="5117" t="12149" r="5333" b="12293"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">t</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10"><space/>

<wd l="5386" t="12149" r="5534" b="12293">o,</wd>

<space/>

</run>

<wd l="5587" t="12082" r="5813" b="12307"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="10">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">))</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10"><nl orig="true"/>

</run>

</ln>

</para>

<para l="2482" t="12245" r="3782" b="12504" alignment="left" li="1008" lsp="exactly" lspExact="203" language="en">

<tabs position="2482"/>

<ln l="2482" t="12245" r="3782" b="12504" baseLine="12472">

<wd l="2482" t="12245" r="2707" b="12437"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="1">Z</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="1">0</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="1"><tab position="2707"/>

<wd l="3168" t="12379" r="3432" b="12494">t=1</wd>

<space/>

<wd l="3490" t="12384" r="3782" b="12504">k=1</wd>

</run>

</ln>

</para>

<para l="1440" t="12533" r="5808" b="14904" alignment="justified" spaceAfter="123" fli="4104" lsp="exactly" lspExact="267" language="en">

<ln l="5558" t="12533" r="5794" b="12730" baseLine="12686" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="5558" t="12538" r="5794" b="12730">(1)</wd>

<space/>

</ln>

<ln l="1440" t="12806" r="5808" b="13008" baseLine="12956">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="12806" r="1973" b="12965">where</wd>

<space/>

</run>

<wd l="2083" t="12806" r="2290" b="12998"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">λ</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">k</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2405" t="12806" r="2544" b="12965">is</wd>

<space/>

<wd l="2654" t="12806" r="2918" b="12965">the</wd>

<space/>

<wd l="3024" t="12806" r="3619" b="13008">weight</wd>

<space/>

<wd l="3725" t="12806" r="3912" b="12965">of</wd>

<space/>

<wd l="4003" t="12806" r="4267" b="12965">the</wd>

<space/>

<wd l="4373" t="12806" r="4978" b="12965">feature</wd>

<space/>

<wd l="5078" t="12806" r="5808" b="12965">function</wd>

<space/>

</run>

</ln>

<ln l="1450" t="13061" r="5794" b="13286" baseLine="13231">

<wd l="1450" t="13061" r="2170" b="13286"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">f</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">k</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">t−1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="2232" t="13128" r="2443" b="13272"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">t</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

<wd l="2496" t="13128" r="2645" b="13272">o,</wd>

<space/>

</run>

<wd l="2698" t="13061" r="2899" b="13286"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2971" t="13075" r="3298" b="13234">that</wd>

<space/>

<wd l="3355" t="13075" r="3494" b="13234">is</wd>

<space/>

<wd l="3557" t="13099" r="3725" b="13234">to</wd>

<space/>

<wd l="3787" t="13075" r="3989" b="13234">be</wd>

<space/>

<wd l="4046" t="13075" r="4690" b="13234">learned</wd>

<space/>

<wd l="4747" t="13075" r="5021" b="13234">via</wd>

<space/>

<wd l="5074" t="13075" r="5794" b="13277">training.</wd>

<space/>

</run>

</ln>

<ln l="1440" t="13349" r="5808" b="13550" baseLine="13498" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="13354" r="1622" b="13502">In</wd>

<space/>

<wd l="1670" t="13349" r="2352" b="13550">general,</wd>

<space/>

<wd l="2414" t="13349" r="3019" b="13507">feature</wd>

<space/>

<wd l="3062" t="13349" r="3869" b="13507">functions</wd>

<space/>

<wd l="3922" t="13349" r="4358" b="13507">takes</wd>

<space/>

<wd l="4416" t="13349" r="4968" b="13550">binary</wd>

<space/>

<wd l="5016" t="13349" r="5486" b="13507">value</wd>

<space/>

<wd l="5530" t="13349" r="5808" b="13507">but</wd>

<space/>

</ln>

<ln l="1445" t="13618" r="5803" b="13819" baseLine="13771">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="13642" r="1598" b="13776">at</wd>

<space/>

<wd l="1670" t="13618" r="2131" b="13776">times</wd>

<space/>

<wd l="2213" t="13618" r="2338" b="13776">it</wd>

<space/>

<wd l="2405" t="13670" r="2779" b="13819">may</wd>

<space/>

<wd l="2851" t="13670" r="3336" b="13819">range</wd>

<space/>

<wd l="3408" t="13618" r="4138" b="13776">between</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><wd l="4219" t="13670" r="4584" b="13776">−oo</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4666" t="13642" r="4834" b="13776">to</wd>

<space/>

</run>

<wd l="4915" t="13642" r="5338" b="13790"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">+</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">oo</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="5467" t="13618" r="5803" b="13776">The</wd>

<space/>

</run>

</ln>

<ln l="1445" t="13891" r="5794" b="14093" baseLine="14040" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="13915" r="2002" b="14093">output</wd>

<space/>

<wd l="2083" t="13891" r="2275" b="14050">of</wd>

<space/>

<wd l="2342" t="13891" r="2650" b="14050">this</wd>

<space/>

<wd l="2736" t="13891" r="3466" b="14050">function</wd>

<space/>

<wd l="3542" t="13891" r="4008" b="14050">relies</wd>

<space/>

<wd l="4104" t="13944" r="4315" b="14050">on</wd>

<space/>

<wd l="4402" t="13891" r="4992" b="14050">certain</wd>

<space/>

<wd l="5078" t="13915" r="5472" b="14050">state</wd>

<space/>

<wd l="5558" t="13944" r="5794" b="14050">se-</wd>

</ln>

<ln l="1445" t="14160" r="5794" b="14362" baseLine="14312">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="14213" r="2059" b="14362">quence</wd>

<space/>

<wd l="2150" t="14160" r="2410" b="14318">i.e.</wd>

<space/>

</run>

<wd l="2616" t="14213" r="3043" b="14357"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">t−1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="3106" t="14213" r="3254" b="14352"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">t</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3365" t="14160" r="3677" b="14318">and</wd>

<space/>

<wd l="3778" t="14160" r="4786" b="14318">observation</wd>

<space/>

<wd l="4877" t="14160" r="5794" b="14362">properties.</wd>

<space/>

</run>

</ln>

<ln l="1440" t="14434" r="5794" b="14635" baseLine="14582">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="14434" r="1781" b="14592">The</wd>

<space/>

<wd l="1834" t="14434" r="3048" b="14592">normalization</wd>

<space/>

<wd l="3106" t="14434" r="3614" b="14592">factor</wd>

<space/>

</run>

<wd l="3677" t="14434" r="3950" b="14621"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">Z</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">o</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4022" t="14434" r="4550" b="14592">define</wd>

<space/>

<wd l="4608" t="14434" r="4781" b="14587">in</wd>

<space/>

<wd l="4838" t="14434" r="5587" b="14635">equation</wd>

<space/>

<wd l="5645" t="14438" r="5794" b="14621">2,</wd>

<space/>

</run>

</ln>

<ln l="1440" t="14702" r="5808" b="14904" baseLine="14851" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="14702" r="1579" b="14861">is</wd>

<space/>

<wd l="1661" t="14702" r="2064" b="14861">used</wd>

<space/>

<wd l="2136" t="14726" r="2304" b="14861">to</wd>

<space/>

<wd l="2381" t="14702" r="2851" b="14861">make</wd>

<space/>

<wd l="2933" t="14702" r="3134" b="14861">all</wd>

<space/>

<wd l="3226" t="14702" r="4200" b="14861">conditional</wd>

<space/>

<wd l="4277" t="14702" r="5357" b="14904">probabilities</wd>

<space/>

<wd l="5453" t="14755" r="5808" b="14861">sum</wd>

</ln>

</para>

<rulerline l="1433" t="15048" r="2640" b="15048" type="single" width="10" color="000000"/>

<para l="1699" t="15101" r="3638" b="15302" alignment="left" li="216" spaceBefore="56" spaceAfter="14" lsp="exactly" lspExact="209" language="en">

<ln l="1699" t="15101" r="3638" b="15302" baseLine="15256">

<wd l="1699" t="15101" r="3638" b="15302"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">3</run>

<run underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">http://noisy-text.github.io/</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><nl orig="true"/>

</run>

</ln>

</para>

</column>

<column l="6142" t="1263" r="10529" b="15328">

<para l="6144" t="1320" r="10507" b="1795" alignment="justified" lsp="exactly" lspExact="269" language="en">

<ln l="6144" t="1320" r="10507" b="1522" baseLine="1474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="1373" r="6360" b="1522">up</wd>

<space/>

<wd l="6422" t="1344" r="6590" b="1478">to</wd>

<space/>

<wd l="6653" t="1320" r="7094" b="1522">unity</wd>

<space/>

<wd l="7162" t="1320" r="7474" b="1478">and</wd>

<space/>

<wd l="7536" t="1373" r="7829" b="1478">can</wd>

<space/>

<wd l="7891" t="1320" r="8098" b="1478">be</wd>

<space/>

<wd l="8160" t="1320" r="9043" b="1478">calculated</wd>

<space/>

<wd l="9106" t="1320" r="9974" b="1522">efficiently</wd>

<space/>

<wd l="10037" t="1320" r="10507" b="1522">using</wd>

<space/>

</ln>

<ln l="6149" t="1594" r="8184" b="1795" baseLine="1742" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="1594" r="6893" b="1795">dynamic</wd>

<space/>

<wd l="6950" t="1594" r="8184" b="1795">programming.</wd>

</ln>

</para>

<para l="8314" t="2251" r="10286" b="2477" alignment="left" spaceBefore="416" lsp="exactly" lspExact="277" language="en" id="_1_2_107">

<ln l="8314" t="2251" r="10286" b="2477" baseLine="2427">

<wd l="8314" t="2266" r="8515" b="2462"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">λ</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">k</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="8611" t="2309" r="8722" b="2419">X</wd>

<space/>

</run>

<wd l="8808" t="2251" r="9533" b="2477"><run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">f</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">k</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t−1</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="9590" t="2318" r="9802" b="2462"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

<wd l="9854" t="2318" r="10003" b="2462">o,</wd>

<space/>

</run>

<wd l="10056" t="2251" r="10286" b="2477"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">))</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="10267" t="2707" r="10502" b="2899" alignment="right" spaceBefore="151" lsp="exactly" lspExact="270" language="en">

<ln l="10267" t="2707" r="10502" b="2899" baseLine="2856" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="10267" t="2707" r="10502" b="2899">(2)</wd>

</ln>

</para>

<para l="6144" t="3168" r="7387" b="3341" alignment="left" spaceBefore="210" lsp="exactly" lspExact="269" language="en">

<ln l="6144" t="3168" r="7387" b="3341" baseLine="3336" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="6144" t="3168" r="6259" b="3341">3</wd>

<space/>

<wd l="6504" t="3173" r="7387" b="3341">Methods</wd>

</ln>

</para>

<para l="6144" t="3605" r="10512" b="7603" alignment="justified" spaceBefore="165" spaceAfter="80" lsp="exactly" lspExact="270" language="en">

<ln l="6144" t="3605" r="10512" b="3806" baseLine="3758" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="3605" r="6610" b="3763">After</wd>

<space/>

<wd l="6720" t="3605" r="7618" b="3806">discussing</wd>

<space/>

<wd l="7728" t="3605" r="8650" b="3763">theoretical</wd>

<space/>

<wd l="8765" t="3629" r="9307" b="3806">aspect</wd>

<space/>

<wd l="9418" t="3605" r="9605" b="3763">of</wd>

<space/>

<wd l="9706" t="3610" r="10138" b="3792">CRF,</wd>

<space/>

<wd l="10258" t="3658" r="10512" b="3763">we</wd>

<space/>

</ln>

<ln l="6144" t="3878" r="10502" b="4080" baseLine="4027" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="3931" r="6514" b="4037">now</wd>

<space/>

<wd l="6576" t="3878" r="7296" b="4037">describe</wd>

<space/>

<wd l="7358" t="3931" r="7646" b="4037">our</wd>

<space/>

<wd l="7699" t="3878" r="8846" b="4080">methodology</wd>

<space/>

<wd l="8904" t="3878" r="9230" b="4037">that</wd>

<space/>

<wd l="9283" t="3931" r="9538" b="4037">we</wd>

<space/>

<wd l="9595" t="3931" r="9883" b="4037">use</wd>

<space/>

<wd l="9941" t="3902" r="10104" b="4037">to</wd>

<space/>

<wd l="10166" t="3931" r="10502" b="4080">per-</wd>

</ln>

<ln l="6144" t="4147" r="10498" b="4349" baseLine="4301" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="4147" r="6571" b="4306">form</wd>

<space/>

<wd l="6614" t="4171" r="6941" b="4306">text</wd>

<space/>

<wd l="6984" t="4147" r="8237" b="4306">normalization.</wd>

<space/>

<wd l="8314" t="4152" r="8448" b="4306">It</wd>

<space/>

<wd l="8496" t="4147" r="9370" b="4349">comprises</wd>

<space/>

<wd l="9427" t="4147" r="9619" b="4306">of</wd>

<space/>

<wd l="9648" t="4171" r="9970" b="4306">two</wd>

<space/>

<wd l="10032" t="4171" r="10498" b="4349">steps.</wd>

<space/>

</ln>

<ln l="6144" t="4421" r="10502" b="4622" baseLine="4570" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="4421" r="6547" b="4579">First</wd>

<space/>

<wd l="6653" t="4445" r="6994" b="4622">step</wd>

<space/>

<wd l="7099" t="4421" r="7776" b="4579">consists</wd>

<space/>

<wd l="7891" t="4421" r="8078" b="4579">of</wd>

<space/>

<wd l="8165" t="4421" r="8842" b="4622">training</wd>

<space/>

<wd l="8947" t="4474" r="9043" b="4579">a</wd>

<space/>

<wd l="9149" t="4421" r="10075" b="4622">supervised</wd>

<space/>

<wd l="10171" t="4474" r="10502" b="4579">ma-</wd>

</ln>

<ln l="6149" t="4690" r="10507" b="4891" baseLine="4838" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="4690" r="6619" b="4848">chine</wd>

<space/>

<wd l="6662" t="4690" r="7373" b="4891">learning</wd>

<space/>

<wd l="7421" t="4690" r="7963" b="4848">model</wd>

<space/>

<wd l="8011" t="4690" r="8266" b="4848">for</wd>

<space/>

<wd l="8309" t="4690" r="8573" b="4848">the</wd>

<space/>

<wd l="8616" t="4690" r="9773" b="4848">identification</wd>

<space/>

<wd l="9816" t="4690" r="10008" b="4848">of</wd>

<space/>

<wd l="10037" t="4690" r="10507" b="4891">noisy</wd>

<space/>

</ln>

<ln l="6144" t="4958" r="10512" b="5160" baseLine="5112" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="4982" r="6514" b="5117">text.</wd>

<space/>

<wd l="6701" t="4963" r="6984" b="5117">We</wd>

<space/>

<wd l="7075" t="4958" r="8011" b="5160">implement</wd>

<space/>

<wd l="8102" t="5011" r="8198" b="5117">a</wd>

<space/>

<wd l="8294" t="4982" r="8530" b="5117">set</wd>

<space/>

<wd l="8621" t="4958" r="8813" b="5117">of</wd>

<space/>

<wd l="8890" t="4958" r="9571" b="5117">features</wd>

<space/>

<wd l="9672" t="4958" r="9998" b="5117">that</wd>

<space/>

<wd l="10085" t="5011" r="10512" b="5117">were</wd>

<space/>

</ln>

<ln l="6144" t="5232" r="10502" b="5434" baseLine="5381" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="5232" r="6734" b="5434">mostly</wd>

<space/>

<wd l="6835" t="5232" r="7474" b="5390">derived</wd>

<space/>

<wd l="7565" t="5232" r="8232" b="5390">without</wd>

<space/>

<wd l="8318" t="5232" r="8789" b="5434">using</wd>

<space/>

<wd l="8885" t="5285" r="9192" b="5434">any</wd>

<space/>

<wd l="9288" t="5232" r="9691" b="5434">deep</wd>

<space/>

<wd l="9787" t="5232" r="10502" b="5390">domain-</wd>

</ln>

<ln l="6154" t="5501" r="10512" b="5702" baseLine="5654" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="5501" r="6806" b="5702">specific</wd>

<space/>

<wd l="6864" t="5554" r="7680" b="5659">resources</wd>

<space/>

<wd l="7742" t="5501" r="8299" b="5659">and/or</wd>

<space/>

<wd l="8342" t="5501" r="8813" b="5659">tools.</wd>

<space/>

<wd l="8890" t="5506" r="9173" b="5659">We</wd>

<space/>

<wd l="9226" t="5501" r="9931" b="5702">perform</wd>

<space/>

<wd l="9984" t="5501" r="10512" b="5659">3-fold</wd>

<space/>

</ln>

<ln l="6149" t="5774" r="10512" b="5976" baseLine="5923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="5827" r="6586" b="5933">cross</wd>

<space/>

<wd l="6662" t="5774" r="7531" b="5933">validation</wd>

<space/>

<wd l="7603" t="5827" r="7819" b="5933">on</wd>

<space/>

<wd l="7886" t="5774" r="8150" b="5933">the</wd>

<space/>

<wd l="8222" t="5774" r="8894" b="5976">training</wd>

<space/>

<wd l="8971" t="5774" r="9336" b="5933">data</wd>

<space/>

<wd l="9398" t="5798" r="9566" b="5933">to</wd>

<space/>

<wd l="9643" t="5774" r="10512" b="5933">determine</wd>

<space/>

</ln>

<ln l="6144" t="6043" r="10498" b="6245" baseLine="6197" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="6043" r="6413" b="6202">the</wd>

<space/>

<wd l="6485" t="6043" r="6835" b="6202">best</wd>

<space/>

<wd l="6902" t="6043" r="7507" b="6202">feature</wd>

<space/>

<wd l="7584" t="6043" r="8712" b="6202">combination.</wd>

<space/>

<wd l="8842" t="6048" r="9024" b="6197">In</wd>

<space/>

<wd l="9091" t="6043" r="9360" b="6202">the</wd>

<space/>

<wd l="9437" t="6043" r="10037" b="6202">second</wd>

<space/>

<wd l="10114" t="6067" r="10498" b="6245">step,</wd>

<space/>

</ln>

<ln l="6144" t="6317" r="10512" b="6518" baseLine="6466" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="6317" r="6907" b="6518">potential</wd>

<space/>

<wd l="7013" t="6317" r="7920" b="6475">candidates</wd>

<space/>

<wd l="8026" t="6317" r="8851" b="6475">identified</wd>

<space/>

<wd l="8947" t="6341" r="9110" b="6475">to</wd>

<space/>

<wd l="9216" t="6317" r="9418" b="6475">be</wd>

<space/>

<wd l="9518" t="6317" r="9984" b="6518">noisy</wd>

<space/>

<wd l="10085" t="6370" r="10512" b="6475">were</wd>

<space/>

</ln>

<ln l="6149" t="6586" r="10502" b="6787" baseLine="6739" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="6586" r="6912" b="6787">analysed</wd>

<space/>

<wd l="6994" t="6586" r="7306" b="6744">and</wd>

<space/>

<wd l="7397" t="6586" r="8525" b="6787">subsequently</wd>

<space/>

<wd l="8606" t="6586" r="9470" b="6787">processed</wd>

<space/>

<wd l="9552" t="6586" r="10018" b="6787">using</wd>

<space/>

<wd l="10104" t="6586" r="10502" b="6744">vari-</wd>

</ln>

<ln l="6149" t="6859" r="10502" b="7061" baseLine="7008" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="6912" r="6442" b="7018">ous</wd>

<space/>

<wd l="6523" t="6859" r="7267" b="7018">heuristic</wd>

<space/>

<wd l="7349" t="6859" r="7848" b="7018">based</wd>

<space/>

<wd l="7920" t="6859" r="8338" b="7018">rules</wd>

<space/>

<wd l="8424" t="6859" r="8678" b="7018">for</wd>

<space/>

<wd l="8750" t="6859" r="10008" b="7018">normalization.</wd>

<space/>

<wd l="10147" t="6859" r="10502" b="7061">Fig-</wd>

</ln>

<ln l="6144" t="7128" r="10512" b="7330" baseLine="7282" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="7181" r="6422" b="7286">ure</wd>

<space/>

<wd l="6523" t="7133" r="6586" b="7282">1</wd>

<space/>

<wd l="6686" t="7128" r="7291" b="7330">depicts</wd>

<space/>

<wd l="7387" t="7128" r="8242" b="7286">schematic</wd>

<space/>

<wd l="8328" t="7128" r="9043" b="7330">diagram</wd>

<space/>

<wd l="9120" t="7128" r="9307" b="7286">of</wd>

<space/>

<wd l="9370" t="7128" r="9634" b="7286">the</wd>

<space/>

<wd l="9710" t="7128" r="10512" b="7330">proposed</wd>

<space/>

</ln>

<ln l="6154" t="7406" r="6797" b="7603" baseLine="7550" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="7426" r="6797" b="7603">system.</wd>

</ln>

</para>

<para l="6144" t="11366" r="10502" b="11842" alignment="justified" spaceBefore="78" lsp="exactly" lspExact="270" language="en">

<ln l="6144" t="11366" r="10502" b="11568" baseLine="11520" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="11366" r="6715" b="11568">Figure</wd>

<space/>

<wd l="6821" t="11371" r="6950" b="11525">1:</wd>

<space/>

<wd l="7171" t="11366" r="7987" b="11568">Proposed</wd>

<space/>

<wd l="8069" t="11366" r="9250" b="11568">methodology.</wd>

<space/>

<wd l="9413" t="11366" r="10008" b="11525">Dotted</wd>

<space/>

<wd l="10085" t="11366" r="10502" b="11525">hori-</wd>

</ln>

<ln l="6149" t="11640" r="8827" b="11842" baseLine="11789" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="11640" r="6677" b="11798">zontal</wd>

<space/>

<wd l="6734" t="11640" r="7061" b="11798">line</wd>

<space/>

<wd l="7123" t="11664" r="7906" b="11842">separates</wd>

<space/>

<wd l="7968" t="11664" r="8290" b="11798">two</wd>

<space/>

<wd l="8357" t="11664" r="8827" b="11842">steps.</wd>

</ln>

</para>

<para l="6144" t="12466" r="7704" b="12624" alignment="left" spaceBefore="577" lsp="exactly" lspExact="254" language="en">

<ln l="6144" t="12466" r="7704" b="12624" baseLine="12619" bold="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="5">

<wd l="6144" t="12466" r="6408" b="12624">3.1</wd>

<space/>

<wd l="6638" t="12470" r="7354" b="12624">Feature</wd>

<space/>

<wd l="7411" t="12466" r="7704" b="12624">Set</wd>

</ln>

</para>

<para l="6144" t="12821" r="10517" b="14918" alignment="justified" spaceBefore="86" spaceAfter="108" lsp="exactly" lspExact="270" language="en">

<ln l="6144" t="12821" r="10502" b="12979" baseLine="12970" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="12821" r="6523" b="12979">This</wd>

<space/>

<wd l="6600" t="12821" r="7210" b="12979">section</wd>

<space/>

<wd l="7272" t="12821" r="8074" b="12979">describes</wd>

<space/>

<wd l="8136" t="12821" r="8405" b="12979">the</wd>

<space/>

<wd l="8462" t="12821" r="9067" b="12979">feature</wd>

<space/>

<wd l="9134" t="12845" r="9370" b="12979">set</wd>

<space/>

<wd l="9427" t="12821" r="9758" b="12979">that</wd>

<space/>

<wd l="9811" t="12874" r="10142" b="12979">was</wd>

<space/>

<wd l="10210" t="12821" r="10502" b="12974">im-</wd>

</ln>

<ln l="6144" t="13090" r="10502" b="13291" baseLine="13243" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="13090" r="7056" b="13291">plemented</wd>

<space/>

<wd l="7114" t="13090" r="7373" b="13248">for</wd>

<space/>

<wd l="7426" t="13090" r="8381" b="13291">identifying</wd>

<space/>

<wd l="8443" t="13090" r="8707" b="13248">the</wd>

<space/>

<wd l="8770" t="13090" r="9528" b="13291">potential</wd>

<space/>

<wd l="9595" t="13090" r="10502" b="13248">candidates</wd>

<space/>

</ln>

<ln l="6144" t="13363" r="10502" b="13522" baseLine="13512" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="13363" r="6475" b="13522">that</wd>

<space/>

<wd l="6552" t="13363" r="6965" b="13522">need</wd>

<space/>

<wd l="7042" t="13387" r="7210" b="13522">to</wd>

<space/>

<wd l="7291" t="13363" r="7498" b="13522">be</wd>

<space/>

<wd l="7579" t="13363" r="8602" b="13522">normalized.</wd>

<space/>

<wd l="8760" t="13363" r="9034" b="13517">All</wd>

<space/>

<wd l="9115" t="13363" r="9384" b="13522">the</wd>

<space/>

<wd l="9461" t="13363" r="10142" b="13522">features</wd>

<space/>

<wd l="10238" t="13363" r="10502" b="13522">de-</wd>

</ln>

<ln l="6149" t="13632" r="10517" b="13834" baseLine="13781" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="13632" r="6586" b="13790">fined</wd>

<space/>

<wd l="6648" t="13685" r="6907" b="13790">are</wd>

<space/>

<wd l="6974" t="13632" r="8765" b="13834">domain-independent</wd>

<space/>

<wd l="8822" t="13632" r="8995" b="13786">in</wd>

<space/>

<wd l="9053" t="13656" r="9638" b="13790">nature.</wd>

<space/>

<wd l="9734" t="13637" r="9998" b="13790">No</wd>

<space/>

<wd l="10066" t="13632" r="10517" b="13790">other</wd>

<space/>

</ln>

<ln l="6149" t="13906" r="10512" b="14107" baseLine="14054" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="13906" r="6840" b="14064">external</wd>

<space/>

<wd l="6898" t="13958" r="7714" b="14064">resources</wd>

<space/>

<wd l="7776" t="13906" r="8333" b="14064">and/or</wd>

<space/>

<wd l="8381" t="13906" r="8846" b="14093">tools,</wd>

<space/>

<wd l="8909" t="13906" r="9298" b="14064">with</wd>

<space/>

<wd l="9350" t="13906" r="9614" b="14064">the</wd>

<space/>

<wd l="9672" t="13906" r="10512" b="14107">exception</wd>

<space/>

</ln>

<ln l="6149" t="14141" r="10502" b="14376" baseLine="14321">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6149" t="14174" r="6341" b="14333">of</wd>

<space/>

<wd l="6408" t="14174" r="7368" b="14376">vocabulary</wd>

<space/>

<wd l="7454" t="14174" r="7646" b="14333">of</wd>

<space/>

<wd l="7714" t="14174" r="8237" b="14333">words</wd>

<space/>

</run>

<wd l="8328" t="14141" r="8462" b="14362"><run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">4</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="8558" t="14227" r="8986" b="14333">were</wd>

<space/>

<wd l="9067" t="14174" r="9466" b="14333">used</wd>

<space/>

<wd l="9547" t="14174" r="9720" b="14328">in</wd>

<space/>

<wd l="9797" t="14174" r="10066" b="14333">the</wd>

<space/>

<wd l="10147" t="14227" r="10502" b="14376">pro-</wd>

</run>

</ln>

<ln l="6144" t="14443" r="10502" b="14645" baseLine="14597" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="14443" r="6658" b="14645">posed</wd>

<space/>

<wd l="6725" t="14443" r="7214" b="14602">work.</wd>

<space/>

<wd l="7339" t="14443" r="8227" b="14645">Following</wd>

<space/>

<wd l="8304" t="14496" r="8568" b="14602">are</wd>

<space/>

<wd l="8635" t="14443" r="8904" b="14602">the</wd>

<space/>

<wd l="8976" t="14443" r="9398" b="14602">brief</wd>

<space/>

<wd l="9461" t="14443" r="10502" b="14645">descriptions</wd>

<space/>

</ln>

<ln l="6149" t="14717" r="8630" b="14918" baseLine="14866" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="14717" r="6341" b="14875">of</wd>

<space/>

<wd l="6384" t="14717" r="6648" b="14875">the</wd>

<space/>

<wd l="6701" t="14717" r="7843" b="14918">implemented</wd>

<space/>

<wd l="7896" t="14717" r="8630" b="14875">features.</wd>

</ln>

</para>

<rulerline l="6142" t="15048" r="7344" b="15048" type="single" width="10" color="000000"/>

<para l="6398" t="15101" r="10027" b="15302" alignment="left" li="216" spaceBefore="57" spaceAfter="13" lsp="exactly" lspExact="209" language="en">

<ln l="6398" t="15101" r="10027" b="15302" baseLine="15257">

<wd l="6398" t="15101" r="10027" b="15302"><run underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">4</run>

<run underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">http://noisy-text.github.io/files/scowl.american.70</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="6142" t="1883" r="8314" b="3043" anchorTo="toPage" refTo="_1_2_107">

<dd l="6142" t="1883" r="7222" b="3043" anchorTo="toPage" refTo="_1_2_107">

<para l="6898" t="2078" r="7166" b="2510" alignment="right" spaceBefore="156" lsp="exactly" lspExact="330" language="en">

<ln l="6898" t="2078" r="7166" b="2510" baseLine="2222" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">

<wd l="6898" t="2227" r="7166" b="2510">E</wd>

</ln>

</para>

<para l="6365" t="2266" r="6816" b="2458" alignment="left" li="216" lsp="exactly" lspExact="125" language="en">

<ln l="6365" t="2266" r="6816" b="2458" baseLine="2430">

<wd l="6365" t="2266" r="6586" b="2458"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="9">Z</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="9">0</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="9"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="9"><wd l="6667" t="2338" r="6816" b="2390">=</wd>

</run>

</ln>

</para>

<para l="6998" t="2496" r="7066" b="2650" alignment="right" ri="72" spaceBefore="2" spaceAfter="356" lsp="exactly" lspExact="186" language="en">

<ln l="6998" t="2496" r="7066" b="2650" baseLine="2640" italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">

<wd l="6998" t="2573" r="7066" b="2650">s</wd>

</ln>

</para>

</dd>

<dd l="7632" t="1883" r="7963" b="2682" anchorTo="toPage" refTo="_1_2_107">

<para l="7652" t="2054" r="7904" b="2165" alignment="left" spaceBefore="141" lsp="exactly" lspExact="183" language="en">

<ln l="7718" t="2054" r="7838" b="2165" baseLine="2165" italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">

<wd l="7718" t="2054" r="7838" b="2165">T</wd>

</ln>

</para>

<para l="7632" t="2227" r="7963" b="2510" alignment="left" spaceBefore="20" lsp="exactly" lspExact="317" language="en">

<ln l="7642" t="2227" r="7910" b="2510" baseLine="2510" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7642" t="2227" r="7910" b="2510">E</wd>

</ln>

</para>

<para l="7632" t="2549" r="7963" b="2664" alignment="left" lsp="exactly" lspExact="134" language="en">

<ln l="7637" t="2549" r="7906" b="2664" baseLine="2659" italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="-5">

<wd l="7637" t="2549" r="7906" b="2664">t=1</wd>

</ln>

</para>

</dd>

<dd l="7222" t="1883" r="7632" b="2682" anchorTo="toPage" refTo="_1_2_107">

<para l="7222" t="2251" r="7632" b="2477" alignment="left" spaceBefore="330" spaceAfter="193" lsp="exactly" lspExact="272" language="en">

<ln l="7224" t="2251" r="7627" b="2477" baseLine="2419">

<wd l="7224" t="2251" r="7627" b="2477"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-18">exp</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-18">(</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-18"><nl orig="true"/>

</run>

</ln>

</para>

</dd>

<dd l="7963" t="1883" r="8314" b="2682" anchorTo="toPage" refTo="_1_2_107">

<para l="7974" t="2054" r="8250" b="2165" alignment="left" spaceBefore="141" lsp="exactly" lspExact="183" language="en">

<ln l="8040" t="2054" r="8184" b="2165" baseLine="2165" italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">

<wd l="8040" t="2054" r="8184" b="2165">K</wd>

</ln>

</para>

<para l="7963" t="2227" r="8312" b="2510" alignment="left" spaceBefore="20" lsp="exactly" lspExact="319" language="en">

<ln l="7978" t="2227" r="8246" b="2510" baseLine="2510" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7978" t="2227" r="8246" b="2510">E</wd>

</ln>

</para>

<para l="7963" t="2549" r="8314" b="2674" alignment="left" lsp="exactly" lspExact="132" language="en">

<ln l="7963" t="2549" r="8256" b="2674" baseLine="2664" italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="-9">

<wd l="7963" t="2554" r="8256" b="2674">k=1</wd>

</ln>

</para>

</dd>

</dd>

<dd l="6142" t="7872" r="10675" b="11088" topDistance="181" bottomDistance="142">

<picture l="6144" t="7872" r="10675" b="11088" alignment="left" li="2">

</picture>

</dd>

<dd l="1433" t="15736" r="10529" b="15977">

<para l="5771" t="15792" r="6196" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5837" t="15792" r="6130" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="24">

<wd l="5837" t="15792" r="6130" b="15946">107</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4316.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1256" marginRight="567" marginBottom="1292" offsetX="8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1256" r="11342" b="15331">

<column l="1440" t="1256" r="5822" b="15331">

<para l="1637" t="1320" r="5808" b="2021" alignment="justified" li="432" spaceBefore="5" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1637" t="1320" r="5808" b="1478" baseLine="1474">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="1637" t="1325" r="1877" b="1474">1.</wd>

<space/>

<wd l="1877" t="1325" r="2395" b="1478">Local</wd>

<space/>

<wd l="2453" t="1334" r="3187" b="1478">context:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="3269" t="1320" r="3763" b="1478">Local</wd>

<space/>

<wd l="3821" t="1320" r="4718" b="1478">contextual</wd>

<space/>

<wd l="4776" t="1320" r="5808" b="1478">information</wd>

<space/>

</run>

</ln>

<ln l="1877" t="1594" r="5798" b="1795" baseLine="1742" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1877" t="1594" r="2050" b="1747">in</wd>

<space/>

<wd l="2093" t="1594" r="2357" b="1752">the</wd>

<space/>

<wd l="2405" t="1594" r="2904" b="1752">forms</wd>

<space/>

<wd l="2966" t="1594" r="3154" b="1752">of</wd>

<space/>

<wd l="3197" t="1594" r="4238" b="1795">surrounding</wd>

<space/>

<wd l="4286" t="1594" r="4810" b="1752">words</wd>

<space/>

<wd l="4872" t="1646" r="5131" b="1752">are</wd>

<space/>

<wd l="5179" t="1594" r="5578" b="1752">used</wd>

<space/>

<wd l="5630" t="1646" r="5798" b="1752">as</wd>

<space/>

</ln>

<ln l="1877" t="1862" r="2846" b="2021" baseLine="2016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1877" t="1862" r="2141" b="2021">the</wd>

<space/>

<wd l="2198" t="1862" r="2846" b="2021">feature.</wd>

</ln>

</para>

<para l="1618" t="2328" r="5808" b="3571" alignment="justified" li="432" spaceBefore="191" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1618" t="2328" r="5808" b="2530" baseLine="2477">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1618" t="2333" r="1877" b="2477">2.</wd>

<space/>

<wd l="1877" t="2333" r="2938" b="2530">Vocabulary</wd>

<space/>

<wd l="3010" t="2333" r="3542" b="2486">word:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="3643" t="2328" r="4162" b="2530">Noisy</wd>

<space/>

<wd l="4224" t="2328" r="4675" b="2486">word</wd>

<space/>

<wd l="4738" t="2381" r="5035" b="2486">can</wd>

<space/>

<wd l="5098" t="2352" r="5381" b="2486">not</wd>

<space/>

<wd l="5438" t="2328" r="5645" b="2486">be</wd>

<space/>

<wd l="5712" t="2381" r="5808" b="2486">a</wd>

<space/>

</run>

</ln>

<ln l="1877" t="2597" r="5794" b="2798" baseLine="2750" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="2626" r="2218" b="2798">part</wd>

<space/>

<wd l="2294" t="2597" r="2486" b="2755">of</wd>

<space/>

<wd l="2544" t="2597" r="2976" b="2755">valid</wd>

<space/>

<wd l="3048" t="2597" r="4037" b="2798">vocabulary.</wd>

<space/>

<wd l="4171" t="2597" r="5074" b="2784">Therefore,</wd>

<space/>

<wd l="5170" t="2597" r="5371" b="2755">all</wd>

<space/>

<wd l="5458" t="2621" r="5794" b="2755">out-</wd>

</ln>

<ln l="1882" t="2870" r="5794" b="3072" baseLine="3019" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1882" t="2870" r="3086" b="3072">of-vocabulary</wd>

<space/>

<wd l="3154" t="2875" r="3739" b="3067">(OOV)</wd>

<space/>

<wd l="3811" t="2923" r="4070" b="3029">are</wd>

<space/>

<wd l="4123" t="2870" r="4392" b="3029">the</wd>

<space/>

<wd l="4445" t="2870" r="5203" b="3072">potential</wd>

<space/>

<wd l="5266" t="2870" r="5794" b="3029">candi-</wd>

</ln>

<ln l="1882" t="3139" r="5808" b="3298" baseLine="3293" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1882" t="3139" r="2318" b="3298">dates</wd>

<space/>

<wd l="2381" t="3139" r="2712" b="3298">that</wd>

<space/>

<wd l="2774" t="3139" r="3350" b="3298">should</wd>

<space/>

<wd l="3403" t="3139" r="3610" b="3298">be</wd>

<space/>

<wd l="3667" t="3139" r="4690" b="3298">normalized.</wd>

<space/>

<wd l="4776" t="3144" r="5059" b="3298">We</wd>

<space/>

<wd l="5122" t="3139" r="5650" b="3298">define</wd>

<space/>

<wd l="5712" t="3192" r="5808" b="3298">a</wd>

<space/>

</ln>

<ln l="1877" t="3413" r="5208" b="3571" baseLine="3562" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="3413" r="2482" b="3571">feature</wd>

<space/>

<wd l="2534" t="3413" r="2866" b="3571">that</wd>

<space/>

<wd l="2923" t="3413" r="3288" b="3571">fires</wd>

<space/>

<wd l="3350" t="3413" r="3494" b="3566">if</wd>

<space/>

<wd l="3538" t="3413" r="3802" b="3571">the</wd>

<space/>

<wd l="3864" t="3437" r="4478" b="3571">current</wd>

<space/>

<wd l="4531" t="3413" r="4666" b="3571">is</wd>

<space/>

<wd l="4733" t="3418" r="5208" b="3571">OOV.</wd>

</ln>

</para>

<para l="1618" t="3874" r="5818" b="4848" alignment="justified" li="432" spaceBefore="192" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1618" t="3874" r="5803" b="4075" baseLine="4027">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6"><wd l="1618" t="3878" r="1877" b="4027">3.</wd>

<space/>

<wd l="1877" t="3874" r="3269" b="4075">Part-of-Speech</wd>

<space/>

<wd l="3341" t="3874" r="3830" b="4066">(PoS)</wd>

<space/>

<wd l="3898" t="3874" r="5064" b="4032">information:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6"><wd l="5165" t="3878" r="5453" b="4032">We</wd>

<space/>

<wd l="5515" t="3926" r="5803" b="4032">use</wd>

<space/>

</run>

</ln>

<ln l="1882" t="4109" r="5803" b="4349" baseLine="4292">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6"><wd l="1882" t="4152" r="2976" b="4306">CMU-Tweet</wd>

<space/>

<wd l="3048" t="4152" r="3389" b="4306">PoS</wd>

<space/>

<wd l="3475" t="4171" r="4027" b="4349">tagger</wd>

<space/>

</run>

<wd l="4104" t="4109" r="4171" b="4224" underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6">5</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6"><space/>

<wd l="4262" t="4147" r="4522" b="4306">for</wd>

<space/>

<wd l="4598" t="4147" r="5458" b="4349">extracting</wd>

<space/>

<wd l="5539" t="4147" r="5803" b="4306">the</wd>

<space/>

</run>

</ln>

<ln l="1877" t="4416" r="5818" b="4574" baseLine="4570" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6">

<wd l="1877" t="4421" r="2218" b="4574">PoS</wd>

<space/>

<wd l="2285" t="4416" r="3360" b="4574">information.</wd>

<space/>

<wd l="3451" t="4416" r="3830" b="4574">This</wd>

<space/>

<wd l="3898" t="4416" r="4037" b="4574">is</wd>

<space/>

<wd l="4104" t="4416" r="4507" b="4574">used</wd>

<space/>

<wd l="4570" t="4469" r="4738" b="4574">as</wd>

<space/>

<wd l="4810" t="4469" r="4906" b="4574">a</wd>

<space/>

<wd l="4958" t="4416" r="5563" b="4574">feature</wd>

<space/>

<wd l="5630" t="4416" r="5818" b="4574">of</wd>

<space/>

</ln>

<ln l="1882" t="4694" r="2314" b="4848" baseLine="4838" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6">

<wd l="1882" t="4694" r="2314" b="4848">CRF.</wd>

</ln>

</para>

<para l="1613" t="5150" r="5818" b="7210" alignment="justified" li="432" spaceBefore="194" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1613" t="5150" r="5808" b="5352" baseLine="5304">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1613" t="5155" r="1877" b="5304">4.</wd>

<space/>

<wd l="1877" t="5155" r="2405" b="5309">Word</wd>

<space/>

<wd l="2482" t="5155" r="3120" b="5352">length:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3245" t="5155" r="3720" b="5309">From</wd>

<space/>

<wd l="3792" t="5150" r="4061" b="5309">the</wd>

<space/>

<wd l="4142" t="5150" r="4613" b="5352">given</wd>

<space/>

<wd l="4690" t="5150" r="5362" b="5352">training</wd>

<space/>

<wd l="5448" t="5150" r="5808" b="5309">data</wd>

<space/>

</run>

</ln>

<ln l="1877" t="5424" r="5803" b="5626" baseLine="5573" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1877" t="5477" r="2131" b="5582">we</wd>

<space/>

<wd l="2237" t="5424" r="3019" b="5582">observed</wd>

<space/>

<wd l="3115" t="5424" r="3446" b="5582">that</wd>

<space/>

<wd l="3542" t="5424" r="4013" b="5626">noisy</wd>

<space/>

<wd l="4118" t="5448" r="4517" b="5582">texts</wd>

<space/>

<wd l="4632" t="5477" r="4891" b="5582">are</wd>

<space/>

<wd l="4997" t="5424" r="5803" b="5626">generally</wd>

<space/>

</ln>

<ln l="1886" t="5693" r="5794" b="5894" baseLine="5846" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1886" t="5693" r="2486" b="5851">shorter</wd>

<space/>

<wd l="2568" t="5693" r="2736" b="5846">in</wd>

<space/>

<wd l="2822" t="5693" r="3494" b="5894">lengths.</wd>

<space/>

<wd l="3662" t="5698" r="3946" b="5851">We</wd>

<space/>

<wd l="4037" t="5693" r="4565" b="5851">define</wd>

<space/>

<wd l="4656" t="5746" r="4752" b="5851">a</wd>

<space/>

<wd l="4829" t="5693" r="5386" b="5894">binary</wd>

<space/>

<wd l="5472" t="5693" r="5794" b="5851">val-</wd>

</ln>

<ln l="1877" t="5966" r="5818" b="6168" baseLine="6115" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1877" t="5966" r="2194" b="6125">ued</wd>

<space/>

<wd l="2251" t="5966" r="2856" b="6125">feature</wd>

<space/>

<wd l="2918" t="5966" r="3250" b="6125">that</wd>

<space/>

<wd l="3307" t="5966" r="3446" b="6125">is</wd>

<space/>

<wd l="3523" t="5990" r="3758" b="6125">set</wd>

<space/>

<wd l="3816" t="5990" r="3984" b="6125">to</wd>

<space/>

<wd l="4046" t="5966" r="4435" b="6168">high</wd>

<space/>

<wd l="4498" t="5966" r="4642" b="6120">if</wd>

<space/>

<wd l="4690" t="5966" r="4954" b="6125">the</wd>

<space/>

<wd l="5016" t="5966" r="5563" b="6168">length</wd>

<space/>

<wd l="5630" t="5966" r="5818" b="6125">of</wd>

<space/>

</ln>

<ln l="1877" t="6235" r="5808" b="6437" baseLine="6389" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1877" t="6235" r="2141" b="6394">the</wd>

<space/>

<wd l="2208" t="6235" r="3038" b="6394">candidate</wd>

<space/>

<wd l="3101" t="6235" r="3581" b="6394">token</wd>

<space/>

<wd l="3648" t="6235" r="4320" b="6394">exceeds</wd>

<space/>

<wd l="4392" t="6288" r="4488" b="6394">a</wd>

<space/>

<wd l="4546" t="6235" r="5808" b="6437">predetermined</wd>

<space/>

</ln>

<ln l="1877" t="6509" r="5808" b="6667" baseLine="6658" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1877" t="6509" r="2731" b="6667">threshold.</wd>

<space/>

<wd l="2870" t="6514" r="3053" b="6662">In</wd>

<space/>

<wd l="3130" t="6562" r="3418" b="6667">our</wd>

<space/>

<wd l="3494" t="6562" r="3864" b="6667">case</wd>

<space/>

<wd l="3936" t="6562" r="4190" b="6667">we</wd>

<space/>

<wd l="4272" t="6562" r="4906" b="6667">assume</wd>

<space/>

<wd l="4982" t="6509" r="5246" b="6667">the</wd>

<space/>

<wd l="5323" t="6509" r="5808" b="6667">token</wd>

<space/>

</ln>

<ln l="1877" t="6778" r="5803" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1877" t="6802" r="2045" b="6936">to</wd>

<space/>

<wd l="2122" t="6778" r="2323" b="6936">be</wd>

<space/>

<wd l="2405" t="6830" r="2501" b="6936">a</wd>

<space/>

<wd l="2573" t="6778" r="3043" b="6979">noisy</wd>

<space/>

<wd l="3120" t="6802" r="3446" b="6936">text</wd>

<space/>

<wd l="3518" t="6778" r="3662" b="6931">if</wd>

<space/>

<wd l="3725" t="6778" r="3922" b="6936">its</wd>

<space/>

<wd l="4003" t="6778" r="4550" b="6979">length</wd>

<space/>

<wd l="4627" t="6778" r="4762" b="6936">is</wd>

<space/>

<wd l="4843" t="6778" r="5165" b="6936">less</wd>

<space/>

<wd l="5246" t="6778" r="5626" b="6936">than</wd>

<space/>

<wd l="5698" t="6782" r="5803" b="6931">4</wd>

<space/>

</ln>

<ln l="1882" t="7051" r="2803" b="7210" baseLine="7200" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1882" t="7051" r="2803" b="7210">characters.</wd>

</ln>

</para>

<para l="1618" t="7512" r="5818" b="8213" alignment="justified" li="432" spaceBefore="190" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1618" t="7512" r="5818" b="7714" baseLine="7666">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1618" t="7512" r="1882" b="7666">5.</wd>

<space/>

<wd l="1882" t="7512" r="2424" b="7670">Suffix</wd>

<space/>

<wd l="2506" t="7517" r="2851" b="7670">and</wd>

<space/>

<wd l="2933" t="7512" r="3542" b="7670">Prefix:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3682" t="7512" r="4373" b="7670">Suffixes</wd>

<space/>

<wd l="4469" t="7512" r="4781" b="7670">and</wd>

<space/>

<wd l="4858" t="7512" r="5534" b="7714">prefixes</wd>

<space/>

<wd l="5630" t="7512" r="5818" b="7670">of</wd>

<space/>

</run>

</ln>

<ln l="1877" t="7786" r="5808" b="7987" baseLine="7934" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="7786" r="2424" b="7987">length</wd>

<space/>

<wd l="2491" t="7810" r="2875" b="7987">upto</wd>

<space/>

<wd l="2952" t="7790" r="3058" b="7939">4</wd>

<space/>

<wd l="3134" t="7786" r="4003" b="7944">characters</wd>

<space/>

<wd l="4090" t="7786" r="4277" b="7944">of</wd>

<space/>

<wd l="4334" t="7786" r="4598" b="7944">the</wd>

<space/>

<wd l="4675" t="7810" r="5290" b="7944">current</wd>

<space/>

<wd l="5362" t="7786" r="5808" b="7944">word</wd>

<space/>

</ln>

<ln l="1882" t="8054" r="3941" b="8213" baseLine="8203" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1882" t="8107" r="2141" b="8213">are</wd>

<space/>

<wd l="2198" t="8054" r="2597" b="8213">used</wd>

<space/>

<wd l="2654" t="8107" r="2827" b="8213">as</wd>

<space/>

<wd l="2890" t="8054" r="3154" b="8213">the</wd>

<space/>

<wd l="3211" t="8054" r="3941" b="8213">features.</wd>

</ln>

</para>

<para l="1618" t="8520" r="5808" b="9802" alignment="justified" li="432" spaceBefore="196" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1618" t="8520" r="5803" b="8722" baseLine="8669">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1618" t="8520" r="1882" b="8669">6.</wd>

<space/>

<wd l="1882" t="8520" r="2333" b="8722">Only</wd>

<space/>

<wd l="2410" t="8520" r="2885" b="8722">digit:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2990" t="8520" r="3370" b="8678">This</wd>

<space/>

<wd l="3442" t="8520" r="4046" b="8678">feature</wd>

<space/>

<wd l="4118" t="8520" r="4699" b="8678">checks</wd>

<space/>

<wd l="4771" t="8520" r="5477" b="8678">whether</wd>

<space/>

<wd l="5539" t="8520" r="5803" b="8678">the</wd>

<space/>

</run>

</ln>

<ln l="1882" t="8789" r="5808" b="8990" baseLine="8938" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1882" t="8813" r="2496" b="8947">current</wd>

<space/>

<wd l="2573" t="8789" r="3058" b="8947">token</wd>

<space/>

<wd l="3139" t="8789" r="3274" b="8947">is</wd>

<space/>

<wd l="3370" t="8789" r="4243" b="8990">consisting</wd>

<space/>

<wd l="4334" t="8789" r="4522" b="8947">of</wd>

<space/>

<wd l="4594" t="8789" r="4973" b="8990">only</wd>

<space/>

<wd l="5064" t="8789" r="5534" b="8990">digits</wd>

<space/>

<wd l="5630" t="8842" r="5808" b="8947">or</wd>

<space/>

</ln>

<ln l="1877" t="9062" r="5794" b="9264" baseLine="9211" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="9086" r="2198" b="9221">not.</wd>

<space/>

<wd l="2347" t="9062" r="2688" b="9221">The</wd>

<space/>

<wd l="2765" t="9062" r="3211" b="9221">word</wd>

<space/>

<wd l="3288" t="9062" r="3571" b="9221">has</wd>

<space/>

<wd l="3662" t="9115" r="3758" b="9221">a</wd>

<space/>

<wd l="3830" t="9062" r="4152" b="9221">low</wd>

<space/>

<wd l="4234" t="9062" r="5184" b="9264">probability</wd>

<space/>

<wd l="5270" t="9062" r="5462" b="9221">of</wd>

<space/>

<wd l="5525" t="9062" r="5794" b="9221">be-</wd>

</ln>

<ln l="1877" t="9331" r="5803" b="9533" baseLine="9480" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="9331" r="2150" b="9533">ing</wd>

<space/>

<wd l="2227" t="9331" r="2698" b="9533">noisy</wd>

<space/>

<wd l="2774" t="9331" r="2918" b="9485">if</wd>

<space/>

<wd l="2981" t="9331" r="3101" b="9490">it</wd>

<space/>

<wd l="3178" t="9331" r="3893" b="9490">contains</wd>

<space/>

<wd l="3979" t="9331" r="4358" b="9533">only</wd>

<space/>

<wd l="4435" t="9331" r="4699" b="9490">the</wd>

<space/>

<wd l="4776" t="9331" r="5304" b="9533">digits.</wd>

<space/>

<wd l="5434" t="9336" r="5803" b="9490">Few</wd>

<space/>

</ln>

<ln l="1882" t="9600" r="4574" b="9802" baseLine="9754">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1882" t="9600" r="2798" b="9802">exceptions</wd>

<space/>

<wd l="2866" t="9653" r="3125" b="9758">are</wd>

<space/>

</run>

<wd l="3182" t="9605" r="3648" b="9797"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(to),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="3715" t="9600" r="4214" b="9797"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">4</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(for)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4282" t="9624" r="4574" b="9758">etc.</wd>

</run>

</ln>

</para>

<para l="1613" t="10066" r="5808" b="11352" alignment="justified" li="432" spaceBefore="190" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1613" t="10066" r="5808" b="10267" baseLine="10214">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1613" t="10066" r="1877" b="10214">7.</wd>

<space/>

<wd l="1877" t="10066" r="2966" b="10267">AlphaDigit:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3082" t="10070" r="3350" b="10219">An</wd>

<space/>

<wd l="3422" t="10066" r="4603" b="10267">alphanumeric</wd>

<space/>

<wd l="4680" t="10066" r="5160" b="10224">token</wd>

<space/>

<wd l="5232" t="10066" r="5635" b="10224">have</wd>

<space/>

<wd l="5712" t="10118" r="5808" b="10224">a</wd>

<space/>

</run>

</ln>

<ln l="1877" t="10339" r="5794" b="10541" baseLine="10488" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="10339" r="2266" b="10541">high</wd>

<space/>

<wd l="2333" t="10339" r="3283" b="10541">probability</wd>

<space/>

<wd l="3360" t="10339" r="3552" b="10498">of</wd>

<space/>

<wd l="3605" t="10339" r="4085" b="10541">being</wd>

<space/>

<wd l="4162" t="10392" r="4258" b="10498">a</wd>

<space/>

<wd l="4320" t="10339" r="4786" b="10541">noisy</wd>

<space/>

<wd l="4858" t="10363" r="5222" b="10498">text.</wd>

<space/>

<wd l="5338" t="10344" r="5496" b="10493">A</wd>

<space/>

<wd l="5563" t="10339" r="5794" b="10498">bi-</wd>

</ln>

<ln l="1877" t="10608" r="5794" b="10810" baseLine="10757" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="10661" r="2261" b="10810">nary</wd>

<space/>

<wd l="2314" t="10608" r="2894" b="10766">valued</wd>

<space/>

<wd l="2942" t="10608" r="3547" b="10766">feature</wd>

<space/>

<wd l="3600" t="10608" r="3734" b="10766">is</wd>

<space/>

<wd l="3797" t="10608" r="4152" b="10766">thus</wd>

<space/>

<wd l="4214" t="10608" r="4853" b="10766">defined</wd>

<space/>

<wd l="4906" t="10608" r="5074" b="10762">in</wd>

<space/>

<wd l="5126" t="10608" r="5390" b="10766">the</wd>

<space/>

<wd l="5443" t="10661" r="5794" b="10810">pro-</wd>

</ln>

<ln l="1877" t="10877" r="5794" b="11078" baseLine="11030" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="10877" r="2386" b="11078">posed</wd>

<space/>

<wd l="2443" t="10877" r="2890" b="11035">work</wd>

<space/>

<wd l="2947" t="10877" r="3485" b="11035">which</wd>

<space/>

<wd l="3547" t="10877" r="3907" b="11035">fires</wd>

<space/>

<wd l="3974" t="10877" r="4450" b="11035">when</wd>

<space/>

<wd l="4507" t="10877" r="4771" b="11035">the</wd>

<space/>

<wd l="4829" t="10877" r="5314" b="11035">token</wd>

<space/>

<wd l="5371" t="10877" r="5510" b="11035">is</wd>

<space/>

<wd l="5582" t="10877" r="5794" b="11035">al-</wd>

</ln>

<ln l="1877" t="11150" r="2952" b="11352" baseLine="11299" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="11150" r="2952" b="11352">phanumeric.</wd>

</ln>

</para>

<para l="1622" t="11611" r="5798" b="12629" alignment="justified" li="432" spaceBefore="193" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1622" t="11611" r="5798" b="11770" baseLine="11765">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1622" t="11611" r="1886" b="11765">8.</wd>

<space/>

<wd l="1886" t="11611" r="3000" b="11770">Consecutive</wd>

<space/>

<wd l="3091" t="11616" r="4123" b="11770">characters:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="4267" t="11611" r="4646" b="11770">This</wd>

<space/>

<wd l="4738" t="11611" r="5342" b="11770">feature</wd>

<space/>

<wd l="5434" t="11611" r="5798" b="11770">fires</wd>

<space/>

</run>

</ln>

<ln l="1877" t="11885" r="5794" b="12043" baseLine="12034" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="11885" r="2352" b="12043">when</wd>

<space/>

<wd l="2405" t="11938" r="2501" b="12043">a</wd>

<space/>

<wd l="2549" t="11885" r="3029" b="12043">token</wd>

<space/>

<wd l="3086" t="11885" r="3763" b="12043">consists</wd>

<space/>

<wd l="3826" t="11885" r="4018" b="12043">of</wd>

<space/>

<wd l="4056" t="11938" r="4502" b="12043">more</wd>

<space/>

<wd l="4555" t="11885" r="4925" b="12043">than</wd>

<space/>

<wd l="4982" t="11890" r="5083" b="12038">2</wd>

<space/>

<wd l="5146" t="11938" r="5794" b="12043">consec-</wd>

</ln>

<ln l="1877" t="12154" r="5798" b="12355" baseLine="12307" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="12154" r="2304" b="12312">utive</wd>

<space/>

<wd l="2376" t="12154" r="3250" b="12312">characters</wd>

<space/>

<wd l="3326" t="12154" r="3466" b="12312">is</wd>

<space/>

<wd l="3542" t="12154" r="4094" b="12312">found.</wd>

<space/>

<wd l="4214" t="12154" r="4594" b="12312">This</wd>

<space/>

<wd l="4670" t="12154" r="5275" b="12312">feature</wd>

<space/>

<wd l="5347" t="12154" r="5798" b="12355">helps</wd>

<space/>

</ln>

<ln l="1877" t="12427" r="5760" b="12629" baseLine="12576" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="12427" r="2050" b="12581">in</wd>

<space/>

<wd l="2102" t="12427" r="3053" b="12629">identifying</wd>

<space/>

<wd l="3115" t="12427" r="3379" b="12586">the</wd>

<space/>

<wd l="3442" t="12427" r="5131" b="12629">stretched/elongated</wd>

<space/>

<wd l="5184" t="12427" r="5760" b="12586">words.</wd>

</ln>

</para>

<para l="1618" t="12888" r="5808" b="14174" alignment="justified" li="432" spaceBefore="195" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1618" t="12888" r="5794" b="13090" baseLine="13042">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1618" t="12888" r="1886" b="13042">9.</wd>

<space/>

<wd l="1886" t="12888" r="2731" b="13090">Compact</wd>

<space/>

<wd l="2803" t="12893" r="3278" b="13046">word</wd>

<space/>

<wd l="3350" t="12888" r="3864" b="13046">form:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3979" t="12888" r="4997" b="13090">Apostrophe</wd>

<space/>

<wd l="5069" t="12888" r="5515" b="13046">mark</wd>

<space/>

<wd l="5597" t="12893" r="5794" b="13085">(’)</wd>

<space/>

</run>

</ln>

<ln l="1877" t="13162" r="5808" b="13320" baseLine="13310" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1877" t="13162" r="2016" b="13320">is</wd>

<space/>

<wd l="2122" t="13162" r="2520" b="13320">used</wd>

<space/>

<wd l="2621" t="13186" r="2784" b="13320">to</wd>

<space/>

<wd l="2890" t="13162" r="3576" b="13320">indicate</wd>

<space/>

<wd l="3677" t="13162" r="3941" b="13320">the</wd>

<space/>

<wd l="4046" t="13162" r="4834" b="13320">omission</wd>

<space/>

<wd l="4934" t="13162" r="5126" b="13320">of</wd>

<space/>

<wd l="5213" t="13214" r="5525" b="13320">one</wd>

<space/>

<wd l="5630" t="13214" r="5808" b="13320">or</wd>

<space/>

</ln>

<ln l="1877" t="13430" r="5798" b="13632" baseLine="13584">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1877" t="13483" r="2323" b="13589">more</wd>

<space/>

<wd l="2410" t="13430" r="2938" b="13589">letters</wd>

<space/>

<wd l="3034" t="13430" r="3461" b="13589">from</wd>

<space/>

<wd l="3547" t="13483" r="3643" b="13589">a</wd>

<space/>

<wd l="3725" t="13430" r="4171" b="13589">word</wd>

<space/>

<wd l="4267" t="13435" r="4637" b="13632">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4819" t="13435" r="5136" b="13613">i’m,</wd>

<space/>

<wd l="5242" t="13435" r="5798" b="13632">you’re</wd>

<space/>

</run>

</ln>

<ln l="1882" t="13704" r="5794" b="13906" baseLine="13853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1882" t="13709" r="2299" b="13901">etc.).</wd>

<space/>

<wd l="2381" t="13709" r="2539" b="13858">A</wd>

<space/>

<wd l="2597" t="13704" r="3149" b="13906">binary</wd>

<space/>

<wd l="3206" t="13704" r="3811" b="13862">feature</wd>

<space/>

<wd l="3869" t="13704" r="4008" b="13862">is</wd>

<space/>

<wd l="4075" t="13704" r="4714" b="13862">defined</wd>

<space/>

<wd l="4766" t="13704" r="5304" b="13862">which</wd>

<space/>

<wd l="5357" t="13704" r="5794" b="13862">iden-</wd>

</ln>

<ln l="1877" t="13973" r="5789" b="14174" baseLine="14126" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1877" t="13973" r="2294" b="14131">tifies</wd>

<space/>

<wd l="2357" t="13973" r="2621" b="14131">the</wd>

<space/>

<wd l="2678" t="13973" r="3350" b="14174">missing</wd>

<space/>

<wd l="3413" t="13973" r="4368" b="14174">apostrophe</wd>

<space/>

<wd l="4421" t="13973" r="4872" b="14131">mark</wd>

<space/>

<wd l="4925" t="13973" r="5098" b="14126">in</wd>

<space/>

<wd l="5155" t="14026" r="5251" b="14131">a</wd>

<space/>

<wd l="5299" t="13973" r="5789" b="14131">word.</wd>

</ln>

</para>

<para l="1526" t="14438" r="5808" b="14909" alignment="justified" li="432" spaceBefore="193" spaceAfter="114" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="4">

</bullet>

<ln l="1526" t="14438" r="5808" b="14640" baseLine="14587">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1526" t="14443" r="1877" b="14587">10.</wd>

<space/>

<wd l="1877" t="14443" r="2578" b="14597">Present</wd>

<space/>

<wd l="2702" t="14438" r="3600" b="14640">participle</wd>

<space/>

</run>

<wd l="3739" t="14438" r="4229" b="14630"><run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">a.k.a</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1300" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4368" t="14438" r="5251" b="14640">ing-form)</wd>

<space/>

<wd l="5390" t="14438" r="5582" b="14597">of</wd>

<space/>

<wd l="5702" t="14486" r="5808" b="14597">a</wd>

<space/>

</run>

</ln>

<ln l="1877" t="14707" r="5803" b="14909" baseLine="14861">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1877" t="14712" r="2357" b="14866">verb:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2467" t="14712" r="2942" b="14866">From</wd>

<space/>

<wd l="3010" t="14707" r="3278" b="14866">the</wd>

<space/>

<wd l="3350" t="14707" r="4042" b="14909">analysis</wd>

<space/>

<wd l="4123" t="14707" r="4315" b="14866">of</wd>

<space/>

<wd l="4368" t="14707" r="5045" b="14909">training</wd>

<space/>

<wd l="5122" t="14707" r="5486" b="14866">data</wd>

<space/>

<wd l="5549" t="14760" r="5803" b="14866">we</wd>

</run>

</ln>

</para>

<rulerline l="1440" t="15048" r="2640" b="15048" type="single" width="10" color="000000"/>

<para l="1694" t="15096" r="4570" b="15302" alignment="left" li="216" spaceBefore="54" spaceAfter="12" lsp="exactly" lspExact="213" language="en">

<ln l="1694" t="15096" r="4570" b="15302" baseLine="15256">

<wd l="1694" t="15096" r="4570" b="15302"><run underlined="none" subsuperscript="superscript" fontSize="650" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">5</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">http://www.ark.cs.cmu.edu/TweetNLP/</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

</column>

<column l="6144" t="1256" r="11342" b="15331">

<para l="6581" t="1320" r="10526" b="2837" alignment="justified" li="504" ri="792" spaceBefore="3" lsp="exactly" lspExact="271" language="en">

<ln l="6586" t="1320" r="10488" b="1522" baseLine="1474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6586" t="1320" r="7368" b="1478">observed</wd>

<space/>

<wd l="7440" t="1320" r="7771" b="1478">that</wd>

<space/>

<wd l="7843" t="1320" r="8424" b="1522">people</wd>

<space/>

<wd l="8501" t="1320" r="8952" b="1478">tends</wd>

<space/>

<wd l="9034" t="1344" r="9202" b="1478">to</wd>

<space/>

<wd l="9288" t="1320" r="9638" b="1522">skip</wd>

<space/>

<wd l="9744" t="1320" r="9902" b="1474">‘i’</wd>

<space/>

<wd l="10003" t="1373" r="10186" b="1478">or</wd>

<space/>

<wd l="10282" t="1325" r="10488" b="1522">‘g’</wd>

<space/>

</ln>

<ln l="6581" t="1594" r="10526" b="1795" baseLine="1742">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6581" t="1594" r="7008" b="1752">from</wd>

<space/>

<wd l="7075" t="1594" r="7339" b="1752">the</wd>

<space/>

<wd l="7411" t="1618" r="8040" b="1795">present</wd>

<space/>

<wd l="8107" t="1594" r="8976" b="1795">participle,</wd>

<space/>

<wd l="9058" t="1594" r="9312" b="1752">i.e.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9442" t="1603" r="9706" b="1795">ing</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9778" t="1594" r="10248" b="1781">form,</wd>

<space/>

<wd l="10334" t="1594" r="10526" b="1752">of</wd>

<space/>

</run>

</ln>

<ln l="6586" t="1862" r="10512" b="2064" baseLine="2016">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6586" t="1915" r="6682" b="2021">a</wd>

<space/>

<wd l="6749" t="1862" r="7171" b="2021">verb.</wd>

<space/>

<wd l="7301" t="1867" r="7608" b="2021">For</wd>

<space/>

<wd l="7680" t="1862" r="8453" b="2064">example,</wd>

<space/>

<wd l="8539" t="1862" r="8909" b="2064">they</wd>

<space/>

<wd l="8986" t="1915" r="9274" b="2021">use</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9350" t="1872" r="9734" b="2064">goin</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9806" t="1862" r="9979" b="2016">in</wd>

<space/>

<wd l="10051" t="1862" r="10512" b="2064">place</wd>

<space/>

</run>

</ln>

<ln l="6586" t="2136" r="10507" b="2338" baseLine="2285">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6586" t="2136" r="6778" b="2294">of</wd>

<space/>

</run>

<wd l="6821" t="2146" r="7363" b="2338"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">going</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7450" t="2136" r="7882" b="2294">Thus</wd>

<space/>

<wd l="7949" t="2189" r="8045" b="2294">a</wd>

<space/>

<wd l="8102" t="2136" r="8707" b="2294">feature</wd>

<space/>

<wd l="8765" t="2136" r="8899" b="2294">is</wd>

<space/>

<wd l="8971" t="2136" r="9610" b="2294">defined</wd>

<space/>

<wd l="9672" t="2136" r="9984" b="2294">and</wd>

<space/>

<wd l="10051" t="2160" r="10286" b="2294">set</wd>

<space/>

<wd l="10339" t="2160" r="10507" b="2294">to</wd>

<space/>

</run>

</ln>

<ln l="6605" t="2405" r="10502" b="2606" baseLine="2558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6605" t="2410" r="6922" b="2563">‘on’</wd>

<space/>

<wd l="7022" t="2405" r="7171" b="2558">if</wd>

<space/>

<wd l="7238" t="2458" r="7334" b="2563">a</wd>

<space/>

<wd l="7406" t="2405" r="7891" b="2563">token</wd>

<space/>

<wd l="7963" t="2405" r="8102" b="2563">is</wd>

<space/>

<wd l="8189" t="2405" r="8698" b="2563">found</wd>

<space/>

<wd l="8774" t="2405" r="9163" b="2563">with</wd>

<space/>

<wd l="9235" t="2405" r="9504" b="2563">the</wd>

<space/>

<wd l="9586" t="2405" r="10094" b="2563">above</wd>

<space/>

<wd l="10171" t="2429" r="10502" b="2606">pat-</wd>

</ln>

<ln l="6581" t="2683" r="6965" b="2837" baseLine="2827" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6581" t="2702" r="6965" b="2837">tern.</wd>

</ln>

</para>

<para l="6235" t="3216" r="10512" b="4190" alignment="justified" li="504" ri="792" spaceBefore="270" fli="-432" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="4">

</bullet>

<ln l="6235" t="3216" r="10512" b="3418" baseLine="3370">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6235" t="3216" r="6586" b="3370">11.</wd>

<space/>

<wd l="6586" t="3216" r="7152" b="3418">Single</wd>

<space/>

<wd l="7200" t="3221" r="8150" b="3374">character:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8227" t="3216" r="8606" b="3374">This</wd>

<space/>

<wd l="8659" t="3216" r="9264" b="3374">feature</wd>

<space/>

<wd l="9312" t="3216" r="9677" b="3374">fires</wd>

<space/>

<wd l="9730" t="3216" r="10200" b="3374">when</wd>

<space/>

<wd l="10243" t="3216" r="10512" b="3374">the</wd>

<space/>

</run>

</ln>

<ln l="6581" t="3490" r="10512" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6581" t="3490" r="7066" b="3648">token</wd>

<space/>

<wd l="7123" t="3490" r="7800" b="3648">consists</wd>

<space/>

<wd l="7867" t="3490" r="8059" b="3648">of</wd>

<space/>

<wd l="8107" t="3542" r="8203" b="3648">a</wd>

<space/>

<wd l="8261" t="3490" r="8770" b="3691">single</wd>

<space/>

<wd l="8832" t="3490" r="9629" b="3648">character</wd>

<space/>

<wd l="9686" t="3490" r="10066" b="3691">only</wd>

<space/>

<wd l="10123" t="3490" r="10512" b="3648">with</wd>

<space/>

</ln>

<ln l="6581" t="3758" r="10512" b="3960" baseLine="3912" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6581" t="3758" r="6850" b="3917">the</wd>

<space/>

<wd l="6936" t="3758" r="7776" b="3960">exception</wd>

<space/>

<wd l="7858" t="3758" r="8050" b="3917">of</wd>

<space/>

<wd l="8117" t="3782" r="8438" b="3917">two</wd>

<space/>

<wd l="8525" t="3758" r="9398" b="3917">characters</wd>

<space/>

<wd l="9485" t="3758" r="9739" b="3917">i.e.</wd>

<space/>

<wd l="9922" t="3763" r="10090" b="3912">‘I’</wd>

<space/>

<wd l="10200" t="3758" r="10512" b="3917">and</wd>

<space/>

</ln>

<ln l="6605" t="4037" r="6869" b="4190" baseLine="4181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6605" t="4037" r="6869" b="4190">‘a’.</wd>

</ln>

</para>

<para l="6235" t="4570" r="10512" b="5813" alignment="justified" li="504" ri="792" spaceBefore="267" fli="-432" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="4">

</bullet>

<ln l="6235" t="4570" r="10502" b="4771" baseLine="4723">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="6235" t="4574" r="6581" b="4723">12.</wd>

<space/>

<wd l="6581" t="4574" r="7066" b="4728">Hash</wd>

<space/>

<wd l="7123" t="4584" r="7411" b="4771">tag</wd>

<space/>

<wd l="7478" t="4570" r="7642" b="4728">&amp;</wd>

<space/>

<wd l="7709" t="4574" r="8707" b="4728">Username:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="8794" t="4570" r="9245" b="4728">Hash</wd>

<space/>

<wd l="9298" t="4594" r="9643" b="4771">tags</wd>

<space/>

<wd l="9710" t="4570" r="10022" b="4728">and</wd>

<space/>

<wd l="10080" t="4622" r="10502" b="4728">user-</wd>

</run>

</ln>

<ln l="6581" t="4843" r="10502" b="5030" baseLine="4992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="4896" r="7133" b="5002">names</wd>

<space/>

<wd l="7195" t="4843" r="7363" b="4997">in</wd>

<space/>

<wd l="7416" t="4867" r="8016" b="5030">tweets,</wd>

<space/>

<wd l="8083" t="4843" r="8616" b="5002">which</wd>

<space/>

<wd l="8678" t="4867" r="9125" b="5002">starts</wd>

<space/>

<wd l="9187" t="4843" r="9576" b="5002">with</wd>

<space/>

<wd l="9629" t="4848" r="9739" b="4997">#</wd>

<space/>

<wd l="9797" t="4848" r="9960" b="5002">&amp;</wd>

<space/>

<wd l="10037" t="4848" r="10190" b="5002">@</wd>

<space/>

<wd l="10267" t="4896" r="10502" b="5002">re-</wd>

</ln>

<ln l="6590" t="5112" r="10512" b="5314" baseLine="5261" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6590" t="5112" r="7488" b="5314">spectively,</wd>

<space/>

<wd l="7555" t="5165" r="7819" b="5270">are</wd>

<space/>

<wd l="7872" t="5136" r="8150" b="5270">not</wd>

<space/>

<wd l="8208" t="5112" r="9149" b="5270">considered</wd>

<space/>

<wd l="9206" t="5165" r="9374" b="5270">as</wd>

<space/>

<wd l="9437" t="5112" r="9907" b="5314">noisy</wd>

<space/>

<wd l="9965" t="5136" r="10291" b="5270">text</wd>

<space/>

<wd l="10339" t="5112" r="10512" b="5266">in</wd>

<space/>

</ln>

<ln l="6581" t="5386" r="10512" b="5587" baseLine="5534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="5386" r="6850" b="5544">the</wd>

<space/>

<wd l="6902" t="5386" r="7579" b="5587">training</wd>

<space/>

<wd l="7642" t="5386" r="8045" b="5544">data.</wd>

<space/>

<wd l="8122" t="5386" r="8981" b="5544">Therefore</wd>

<space/>

<wd l="9038" t="5386" r="9346" b="5544">this</wd>

<space/>

<wd l="9408" t="5386" r="10013" b="5544">feature</wd>

<space/>

<wd l="10070" t="5386" r="10205" b="5544">is</wd>

<space/>

<wd l="10277" t="5410" r="10512" b="5544">set</wd>

<space/>

</ln>

<ln l="6581" t="5654" r="9749" b="5813" baseLine="5803" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="5678" r="6749" b="5813">to</wd>

<space/>

<wd l="6806" t="5654" r="7214" b="5813">false</wd>

<space/>

<wd l="7272" t="5654" r="7416" b="5808">if</wd>

<space/>

<wd l="7464" t="5707" r="7560" b="5813">a</wd>

<space/>

<wd l="7608" t="5654" r="8093" b="5813">token</wd>

<space/>

<wd l="8155" t="5678" r="8602" b="5813">starts</wd>

<space/>

<wd l="8664" t="5654" r="9053" b="5813">with</wd>

<space/>

<wd l="9106" t="5659" r="9216" b="5808">#</wd>

<space/>

<wd l="9274" t="5707" r="9456" b="5813">or</wd>

<space/>

<wd l="9528" t="5659" r="9749" b="5813">@.</wd>

</ln>

</para>

<para l="6144" t="6226" r="9701" b="6384" alignment="left" li="72" spaceBefore="321" lsp="exactly" lspExact="250" language="en">

<ln l="6144" t="6226" r="9701" b="6384" baseLine="6374" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6144" t="6226" r="6413" b="6384">3.2</wd>

<space/>

<wd l="6638" t="6226" r="7498" b="6384">Heuristic</wd>

<space/>

<wd l="7555" t="6230" r="8011" b="6384">rules</wd>

<space/>

<wd l="8069" t="6226" r="8338" b="6384">for</wd>

<space/>

<wd l="8395" t="6226" r="9701" b="6384">normalization</wd>

</ln>

</para>

<para l="6144" t="6619" r="10526" b="8174" alignment="justified" li="72" ri="792" spaceBefore="126" lsp="exactly" lspExact="271" language="en">

<ln l="6149" t="6619" r="10498" b="6821" baseLine="6773" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6149" t="6624" r="6605" b="6778">Once</wd>

<space/>

<wd l="6662" t="6619" r="6926" b="6778">the</wd>

<space/>

<wd l="6984" t="6619" r="7454" b="6821">noisy</wd>

<space/>

<wd l="7512" t="6643" r="7838" b="6778">text</wd>

<space/>

<wd l="7891" t="6672" r="8218" b="6778">was</wd>

<space/>

<wd l="8285" t="6619" r="9110" b="6778">identified</wd>

<space/>

<wd l="9163" t="6619" r="9336" b="6773">in</wd>

<space/>

<wd l="9389" t="6619" r="9653" b="6778">the</wd>

<space/>

<wd l="9715" t="6619" r="10051" b="6778">first</wd>

<space/>

<wd l="10114" t="6643" r="10498" b="6821">step,</wd>

<space/>

</ln>

<ln l="6144" t="6893" r="10512" b="7051" baseLine="7042" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6144" t="6946" r="6398" b="7051">we</wd>

<space/>

<wd l="6470" t="6893" r="7018" b="7051">devise</wd>

<space/>

<wd l="7090" t="6946" r="7186" b="7051">a</wd>

<space/>

<wd l="7258" t="6917" r="7493" b="7051">set</wd>

<space/>

<wd l="7565" t="6893" r="7752" b="7051">of</wd>

<space/>

<wd l="7805" t="6893" r="8222" b="7051">rules</wd>

<space/>

<wd l="8299" t="6893" r="8558" b="7051">for</wd>

<space/>

<wd l="8621" t="6893" r="9874" b="7051">normalization.</wd>

<space/>

<wd l="9989" t="6893" r="10512" b="7051">These</wd>

<space/>

</ln>

<ln l="6144" t="7162" r="10502" b="7320" baseLine="7310" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6144" t="7162" r="6562" b="7320">rules</wd>

<space/>

<wd l="6629" t="7214" r="6893" b="7320">are</wd>

<space/>

<wd l="6946" t="7162" r="7694" b="7320">heuristic</wd>

<space/>

<wd l="7757" t="7162" r="7925" b="7315">in</wd>

<space/>

<wd l="7982" t="7186" r="8525" b="7320">nature</wd>

<space/>

<wd l="8587" t="7162" r="8899" b="7320">and</wd>

<space/>

<wd l="8952" t="7162" r="9451" b="7320">based</wd>

<space/>

<wd l="9509" t="7214" r="9725" b="7320">on</wd>

<space/>

<wd l="9778" t="7162" r="10046" b="7320">the</wd>

<space/>

<wd l="10099" t="7162" r="10502" b="7320">facts</wd>

<space/>

</ln>

<ln l="6149" t="7435" r="10526" b="7637" baseLine="7584" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6149" t="7440" r="6312" b="7594">&amp;</wd>

<space/>

<wd l="6365" t="7435" r="7056" b="7637">analysis</wd>

<space/>

<wd l="7118" t="7488" r="7330" b="7594">on</wd>

<space/>

<wd l="7378" t="7435" r="7642" b="7594">the</wd>

<space/>

<wd l="7690" t="7435" r="8366" b="7637">training</wd>

<space/>

<wd l="8419" t="7435" r="8822" b="7594">data.</wd>

<space/>

<wd l="8899" t="7435" r="9461" b="7594">Below</wd>

<space/>

<wd l="9509" t="7435" r="9648" b="7594">is</wd>

<space/>

<wd l="9701" t="7435" r="9970" b="7594">the</wd>

<space/>

<wd l="10018" t="7435" r="10286" b="7594">list</wd>

<space/>

<wd l="10334" t="7435" r="10526" b="7594">of</wd>

<space/>

</ln>

<ln l="6144" t="7704" r="10512" b="7906" baseLine="7853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6144" t="7704" r="6562" b="7862">rules</wd>

<space/>

<wd l="6648" t="7704" r="7790" b="7906">implemented</wd>

<space/>

<wd l="7877" t="7704" r="8726" b="7906">according</wd>

<space/>

<wd l="8813" t="7728" r="8976" b="7862">to</wd>

<space/>

<wd l="9062" t="7704" r="9466" b="7862">their</wd>

<space/>

<wd l="9547" t="7704" r="10512" b="7906">application</wd>

<space/>

</ln>

<ln l="6144" t="7973" r="8035" b="8174" baseLine="8126" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6144" t="7973" r="6317" b="8126">in</wd>

<space/>

<wd l="6370" t="7973" r="6634" b="8131">the</wd>

<space/>

<wd l="6691" t="7973" r="7493" b="8174">proposed</wd>

<space/>

<wd l="7546" t="7973" r="8035" b="8131">work.</wd>

</ln>

</para>

<para l="6341" t="8544" r="10512" b="10872" alignment="justified" li="504" ri="792" spaceBefore="302" fli="-288" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6341" t="8544" r="10512" b="8746" baseLine="8698">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="6341" t="8549" r="6581" b="8698">1.</wd>

<space/>

<wd l="6581" t="8549" r="7445" b="8746">Frequent</wd>

<space/>

<wd l="7498" t="8544" r="8731" b="8702">abbreviation:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="8813" t="8544" r="9192" b="8702">This</wd>

<space/>

<wd l="9254" t="8544" r="9394" b="8702">is</wd>

<space/>

<wd l="9456" t="8544" r="9720" b="8702">the</wd>

<space/>

<wd l="9782" t="8544" r="10118" b="8702">first</wd>

<space/>

<wd l="10171" t="8544" r="10512" b="8702">rule</wd>

<space/>

</run>

</ln>

<ln l="6581" t="8818" r="10512" b="9019" baseLine="8966" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="8818" r="6912" b="8976">that</wd>

<space/>

<wd l="6994" t="8870" r="7248" b="8976">we</wd>

<space/>

<wd l="7344" t="8818" r="7819" b="9019">apply</wd>

<space/>

<wd l="7915" t="8870" r="8126" b="8976">on</wd>

<space/>

<wd l="8213" t="8818" r="8477" b="8976">the</wd>

<space/>

<wd l="8568" t="8818" r="9034" b="9019">noisy</wd>

<space/>

<wd l="9125" t="8842" r="9494" b="8976">text.</wd>

<space/>

<wd l="9667" t="8822" r="9955" b="8976">We</wd>

<space/>

<wd l="10042" t="8818" r="10512" b="8976">make</wd>

<space/>

</ln>

<ln l="6581" t="9086" r="10512" b="9288" baseLine="9240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="9139" r="6874" b="9245">use</wd>

<space/>

<wd l="6965" t="9086" r="7157" b="9245">of</wd>

<space/>

<wd l="7234" t="9139" r="7330" b="9245">a</wd>

<space/>

<wd l="7416" t="9086" r="7685" b="9245">list</wd>

<space/>

<wd l="7776" t="9086" r="7963" b="9245">of</wd>

<space/>

<wd l="8040" t="9086" r="8770" b="9288">frequent</wd>

<space/>

<wd l="8856" t="9086" r="10018" b="9245">abbreviations</wd>

<space/>

<wd l="10109" t="9086" r="10512" b="9245">used</wd>

<space/>

</ln>

<ln l="6581" t="9360" r="10502" b="9518" baseLine="9509" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="9360" r="6754" b="9514">in</wd>

<space/>

<wd l="6826" t="9360" r="7397" b="9518">twitter</wd>

<space/>

<wd l="7469" t="9360" r="7781" b="9518">and</wd>

<space/>

<wd l="7853" t="9360" r="8050" b="9518">its</wd>

<space/>

<wd l="8131" t="9360" r="8746" b="9518">normal</wd>

<space/>

<wd l="8822" t="9360" r="9288" b="9518">form.</wd>

<space/>

<wd l="9422" t="9360" r="9763" b="9518">The</wd>

<space/>

<wd l="9835" t="9360" r="10104" b="9518">list</wd>

<space/>

<wd l="10176" t="9413" r="10502" b="9518">was</wd>

<space/>

</ln>

<ln l="6586" t="9595" r="10498" b="9830" baseLine="9774">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="6586" t="9629" r="7397" b="9830">compiled</wd>

<space/>

<wd l="7450" t="9629" r="7877" b="9787">from</wd>

<space/>

<wd l="7930" t="9629" r="8194" b="9787">the</wd>

<space/>

<wd l="8251" t="9629" r="8640" b="9787">Web</wd>

<space/>

</run>

<wd l="8702" t="9595" r="8918" b="9739"><run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">6</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-4">,</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">7</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><space/>

<wd l="8995" t="9629" r="9307" b="9787">and</wd>

<space/>

<wd l="9360" t="9629" r="10037" b="9830">training</wd>

<space/>

<wd l="10099" t="9629" r="10498" b="9787">data.</wd>

<space/>

</run>

</ln>

<ln l="6581" t="9902" r="10512" b="10104" baseLine="10051" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="9902" r="6739" b="10056">If</wd>

<space/>

<wd l="6773" t="9902" r="7042" b="10061">the</wd>

<space/>

<wd l="7090" t="9902" r="7574" b="10061">token</wd>

<space/>

<wd l="7618" t="9902" r="8443" b="10061">identified</wd>

<space/>

<wd l="8496" t="9955" r="8664" b="10061">as</wd>

<space/>

<wd l="8726" t="9955" r="8822" b="10061">a</wd>

<space/>

<wd l="8866" t="9902" r="9624" b="10104">potential</wd>

<space/>

<wd l="9682" t="9902" r="10512" b="10061">candidate</wd>

<space/>

</ln>

<ln l="6581" t="10171" r="10507" b="10373" baseLine="10325" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="10171" r="6754" b="10325">in</wd>

<space/>

<wd l="6797" t="10171" r="7066" b="10330">the</wd>

<space/>

<wd l="7118" t="10171" r="7454" b="10330">first</wd>

<space/>

<wd l="7512" t="10195" r="7848" b="10373">step</wd>

<space/>

<wd l="7901" t="10171" r="8035" b="10330">is</wd>

<space/>

<wd l="8093" t="10195" r="8722" b="10373">present</wd>

<space/>

<wd l="8770" t="10171" r="8942" b="10325">in</wd>

<space/>

<wd l="8986" t="10171" r="9254" b="10330">the</wd>

<space/>

<wd l="9302" t="10171" r="9571" b="10330">list</wd>

<space/>

<wd l="9614" t="10224" r="9869" b="10330">we</wd>

<space/>

<wd l="9926" t="10171" r="10507" b="10373">simply</wd>

<space/>

</ln>

<ln l="6581" t="10445" r="10512" b="10646" baseLine="10594" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="10445" r="7210" b="10646">replace</wd>

<space/>

<wd l="7262" t="10445" r="7387" b="10603">it</wd>

<space/>

<wd l="7440" t="10445" r="7829" b="10603">with</wd>

<space/>

<wd l="7877" t="10445" r="8146" b="10603">the</wd>

<space/>

<wd l="8198" t="10445" r="8813" b="10603">normal</wd>

<space/>

<wd l="8870" t="10469" r="9235" b="10632">text,</wd>

<space/>

<wd l="9307" t="10445" r="10190" b="10632">otherwise,</wd>

<space/>

<wd l="10258" t="10498" r="10512" b="10603">we</wd>

<space/>

</ln>

<ln l="6581" t="10714" r="8688" b="10872" baseLine="10867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="10766" r="7061" b="10872">move</wd>

<space/>

<wd l="7118" t="10738" r="7498" b="10872">onto</wd>

<space/>

<wd l="7555" t="10714" r="7824" b="10872">the</wd>

<space/>

<wd l="7877" t="10738" r="8251" b="10872">next</wd>

<space/>

<wd l="8304" t="10714" r="8688" b="10872">rule.</wd>

</ln>

</para>

<para l="6322" t="11256" r="10517" b="12538" alignment="justified" li="504" ri="792" spaceBefore="268" fli="-288" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6322" t="11256" r="10502" b="11458" baseLine="11405">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6322" t="11261" r="6581" b="11405">2.</wd>

<space/>

<wd l="6581" t="11261" r="7286" b="11414">Present</wd>

<space/>

<wd l="7354" t="11256" r="8251" b="11458">participle</wd>

<space/>

<wd l="8333" t="11256" r="8520" b="11414">of</wd>

<space/>

<wd l="8587" t="11304" r="8693" b="11414">a</wd>

<space/>

<wd l="8765" t="11261" r="9245" b="11414">verb:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9365" t="11261" r="9523" b="11410">A</wd>

<space/>

<wd l="9600" t="11256" r="9936" b="11414">rule</wd>

<space/>

<wd l="10013" t="11256" r="10152" b="11414">is</wd>

<space/>

<wd l="10238" t="11256" r="10502" b="11414">de-</wd>

</run>

</ln>

<ln l="6586" t="11525" r="10507" b="11726" baseLine="11678" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6586" t="11525" r="7018" b="11683">fined</wd>

<space/>

<wd l="7080" t="11525" r="7339" b="11683">for</wd>

<space/>

<wd l="7402" t="11578" r="7498" b="11683">a</wd>

<space/>

<wd l="7555" t="11525" r="8491" b="11726">misspelled</wd>

<space/>

<wd l="8549" t="11549" r="9182" b="11726">present</wd>

<space/>

<wd l="9240" t="11525" r="10066" b="11726">participle</wd>

<space/>

<wd l="10128" t="11525" r="10507" b="11683">verb</wd>

<space/>

</ln>

<ln l="6586" t="11798" r="10512" b="12000" baseLine="11947" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6586" t="11851" r="6754" b="11957">as</wd>

<space/>

<wd l="6835" t="11798" r="7670" b="11957">discussed</wd>

<space/>

<wd l="7738" t="11798" r="7906" b="11952">in</wd>

<space/>

<wd l="7982" t="11798" r="8592" b="11957">section</wd>

<space/>

<wd l="8664" t="11803" r="8976" b="11957">3.1.</wd>

<space/>

<wd l="9096" t="11803" r="9379" b="11957">We</wd>

<space/>

<wd l="9451" t="11798" r="10123" b="12000">identify</wd>

<space/>

<wd l="10200" t="11798" r="10512" b="11957">and</wd>

<space/>

</ln>

<ln l="6586" t="12067" r="10517" b="12269" baseLine="12216">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6586" t="12120" r="7022" b="12226">cross</wd>

<space/>

<wd l="7104" t="12067" r="7608" b="12226">check</wd>

<space/>

<wd l="7670" t="12067" r="7872" b="12226">its</wd>

<space/>

<wd l="7944" t="12072" r="8285" b="12226">PoS</wd>

<space/>

<wd l="8366" t="12091" r="8626" b="12269">tag</wd>

<space/>

<wd l="8707" t="12067" r="9024" b="12264">(i.e.</wd>

<space/>

</run>

<wd l="9154" t="12072" r="9739" b="12264"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">VERB</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="9816" t="12067" r="9984" b="12221">in</wd>

<space/>

<wd l="10056" t="12067" r="10517" b="12226">order</wd>

<space/>

</run>

</ln>

<ln l="6581" t="12336" r="9686" b="12538" baseLine="12490" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6581" t="12360" r="6749" b="12494">to</wd>

<space/>

<wd l="6806" t="12336" r="7464" b="12494">retrieve</wd>

<space/>

<wd l="7517" t="12336" r="7718" b="12494">its</wd>

<space/>

<wd l="7781" t="12336" r="8213" b="12494">valid</wd>

<space/>

<wd l="8270" t="12336" r="9163" b="12538">equivalent</wd>

<space/>

<wd l="9216" t="12336" r="9686" b="12494">form.</wd>

</ln>

</para>

<para l="6322" t="12878" r="10512" b="14419" alignment="justified" li="504" ri="792" spaceBefore="270" spaceAfter="172" fli="-288" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6322" t="12878" r="10502" b="13080" baseLine="13027">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6322" t="12883" r="6581" b="13027">3.</wd>

<space/>

<wd l="6581" t="12878" r="7306" b="13080">Missing</wd>

<space/>

<wd l="7411" t="12878" r="8722" b="13080">apostrophe(’):</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8885" t="12878" r="9514" b="13037">Twitter</wd>

<space/>

<wd l="9605" t="12931" r="10046" b="13037">users</wd>

<space/>

<wd l="10152" t="12931" r="10502" b="13037">nor-</wd>

</run>

</ln>

<ln l="6581" t="13147" r="10512" b="13349" baseLine="13301" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6581" t="13147" r="7075" b="13349">mally</wd>

<space/>

<wd l="7157" t="13147" r="7627" b="13349">drops</wd>

<space/>

<wd l="7709" t="13147" r="8664" b="13349">apostrophe</wd>

<space/>

<wd l="8736" t="13147" r="9182" b="13306">mark</wd>

<space/>

<wd l="9254" t="13147" r="9427" b="13301">in</wd>

<space/>

<wd l="9494" t="13171" r="10094" b="13306">tweets.</wd>

<space/>

<wd l="10224" t="13152" r="10512" b="13306">We</wd>

<space/>

</ln>

<ln l="6586" t="13421" r="10502" b="13622" baseLine="13570" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6586" t="13421" r="7114" b="13579">define</wd>

<space/>

<wd l="7181" t="13474" r="7277" b="13579">a</wd>

<space/>

<wd l="7339" t="13421" r="7675" b="13579">rule</wd>

<space/>

<wd l="7738" t="13445" r="7906" b="13579">to</wd>

<space/>

<wd l="7973" t="13421" r="8645" b="13622">identify</wd>

<space/>

<wd l="8717" t="13421" r="9029" b="13579">and</wd>

<space/>

<wd l="9091" t="13421" r="9576" b="13579">insert</wd>

<space/>

<wd l="9643" t="13474" r="9739" b="13579">a</wd>

<space/>

<wd l="9802" t="13445" r="10502" b="13622">apostro-</wd>

</ln>

<ln l="6581" t="13690" r="10502" b="13891" baseLine="13843" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6581" t="13690" r="6898" b="13891">phe</wd>

<space/>

<wd l="6960" t="13690" r="7406" b="13848">mark</wd>

<space/>

<wd l="7474" t="13714" r="7627" b="13848">at</wd>

<space/>

<wd l="7690" t="13742" r="8261" b="13891">proper</wd>

<space/>

<wd l="8318" t="13690" r="8822" b="13891">place.</wd>

<space/>

<wd l="8923" t="13690" r="9302" b="13848">This</wd>

<space/>

<wd l="9374" t="13690" r="9710" b="13848">rule</wd>

<space/>

<wd l="9773" t="13742" r="10104" b="13848">was</wd>

<space/>

<wd l="10176" t="13742" r="10502" b="13848">em-</wd>

</ln>

<ln l="6581" t="13963" r="10498" b="14165" baseLine="14112">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6581" t="13963" r="7176" b="14165">ployed</wd>

<space/>

<wd l="7253" t="13963" r="7512" b="14122">for</wd>

<space/>

<wd l="7589" t="13963" r="8352" b="14165">handling</wd>

<space/>

<wd l="8434" t="13963" r="9274" b="14165">following</wd>

<space/>

<wd l="9360" t="13963" r="10090" b="14122">variants:</wd>

<space/>

</run>

<wd l="10253" t="13968" r="10498" b="14150"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">’m</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="6610" t="14232" r="8453" b="14419" baseLine="14386">

<wd l="6610" t="14232" r="6821" b="14419"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">’ll</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="6912" t="14237" r="7190" b="14419"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">’ve</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="7286" t="14237" r="7541" b="14419"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">’re</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="7608" t="14237" r="7886" b="14419"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">n’t</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7982" t="14237" r="8098" b="14390">’s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8160" t="14256" r="8453" b="14390">etc.</wd>

</run>

</ln>

</para>

<rulerline l="6144" t="14630" r="7344" b="14630" type="single" width="10" color="000000"/>

<para l="6144" t="14683" r="11338" b="15302" alignment="left" li="72" spaceBefore="61" spaceAfter="9" fli="144" lsp="exactly" lspExact="209" language="en">

<ln l="6398" t="14683" r="11338" b="14890" baseLine="14840">

<wd l="6398" t="14683" r="8899" b="14890"><run underlined="none" subsuperscript="superscript" fontSize="650" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">6</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">http://www.webopedia.com/quick</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4"><space/>

<wd l="8962" t="14722" r="9725" b="14851">ref/Twitter</wd>

<space/>

<wd l="9787" t="14722" r="10546" b="14890">Dictionary</wd>

<space/>

<wd l="10627" t="14722" r="11338" b="14890">Guide.asp</wd>

<space/>

</run>

</ln>

<ln l="6398" t="14904" r="10003" b="15106" baseLine="15056">

<wd l="6398" t="14904" r="10003" b="15106">7http://marketing.wtwhmedia.com/30-must-know-</wd>

</ln>

<run underlined="none" subsuperscript="superscript" fontSize="650" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">7</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">http://marketing.wtwhmedia.com/30-must-know-</run>

<ln l="6144" t="15134" r="8755" b="15302" baseLine="15259" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="6144" t="15134" r="8755" b="15302">twitter-abbreviations-and-acronyms/</wd>

</ln>

</para>

</column>

</section>

<dd l="1440" t="15746" r="11342" b="15975">

<para l="5771" t="15792" r="6196" b="15946" alignment="left" li="4320" lsp="exactly" lspExact="223" language="en">

<ln l="5837" t="15792" r="6130" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="28">

<wd l="5837" t="15792" r="6130" b="15946">108</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4316.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1431" marginTop="1248" marginRight="1301" marginBottom="1292" offsetX="-22" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1512" t="1248" r="10608" b="2347">

<column l="1512" t="1248" r="5899" b="2347">

<para l="1613" t="1320" r="5794" b="2338" alignment="justified" li="360" ri="72" spaceBefore="11" fli="-288" lsp="exactly" lspExact="268" language="en">

<ln l="1613" t="1320" r="5794" b="1522" baseLine="1474">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1613" t="1325" r="1766" b="1478">4.</wd>

<space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1877" t="1325" r="2822" b="1522">Elongated</wd>

<space/>

<wd l="2923" t="1320" r="3437" b="1478">form:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="3610" t="1320" r="4128" b="1522">Noisy</wd>

<space/>

<wd l="4234" t="1320" r="4680" b="1478">word</wd>

<space/>

<wd l="4781" t="1320" r="4949" b="1474">in</wd>

<space/>

<wd l="5050" t="1320" r="5246" b="1478">its</wd>

<space/>

<wd l="5362" t="1320" r="5794" b="1478">elon-</wd>

</run>

</ln>

<ln l="1882" t="1594" r="5794" b="1795" baseLine="1742">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1882" t="1594" r="2347" b="1795">gated</wd>

<space/>

<wd l="2419" t="1594" r="2846" b="1752">form</wd>

<space/>

<wd l="2923" t="1594" r="3240" b="1790">(i.e.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="3360" t="1646" r="4200" b="1795">yeeeeesss</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="4272" t="1594" r="4531" b="1752">for</wd>

<space/>

</run>

<wd l="4589" t="1598" r="4939" b="1795"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">yes</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="5026" t="1646" r="5285" b="1752">are</wd>

<space/>

<wd l="5357" t="1594" r="5794" b="1752">iden-</wd>

</run>

</ln>

<ln l="1877" t="1862" r="5794" b="2064" baseLine="2016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1877" t="1862" r="2328" b="2021">tified</wd>

<space/>

<wd l="2390" t="1862" r="2702" b="2021">and</wd>

<space/>

<wd l="2760" t="1862" r="3610" b="2021">translated</wd>

<space/>

<wd l="3667" t="1862" r="4003" b="2021">into</wd>

<space/>

<wd l="4070" t="1862" r="4502" b="2021">valid</wd>

<space/>

<wd l="4560" t="1862" r="5006" b="2021">word</wd>

<space/>

<wd l="5069" t="1862" r="5280" b="2064">by</wd>

<space/>

<wd l="5347" t="1862" r="5794" b="2021">itera-</wd>

</ln>

<ln l="1877" t="2136" r="5558" b="2338" baseLine="2285" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1877" t="2136" r="2362" b="2338">tively</wd>

<space/>

<wd l="2429" t="2136" r="3192" b="2338">stripping</wd>

<space/>

<wd l="3254" t="2136" r="3514" b="2294">off</wd>

<space/>

<wd l="3557" t="2136" r="4574" b="2294">consecutive</wd>

<space/>

<wd l="4632" t="2136" r="5558" b="2294">characters.</wd>

</ln>

</para>

</column>

<column l="6221" t="1248" r="10608" b="2151">

<table l="6329" t="1258" r="10320" b="2141" alignment="left" li="108" ri="288" spaceBefore="10" spaceAfter="10">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<gridTable>

<gridCol>958</gridCol>

<gridCol>1041</gridCol>

<gridCol>1061</gridCol>

<gridCol>931</gridCol>

<gridRow>302</gridRow>

<gridRow>298</gridRow>

<gridRow>283</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6466" t="1306" r="7171" b="1459" alignment="centered" spaceAfter="46" lsp="exactly" lspExact="251" language="en">

<ln l="6466" t="1306" r="7171" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6466" t="1306" r="7171" b="1459">Dataset</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7406" t="1301" r="8203" b="1459" alignment="centered" spaceAfter="46" lsp="exactly" lspExact="251" language="en">

<ln l="7406" t="1301" r="8203" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="7406" t="1301" r="7517" b="1454">#</wd>

<space/>

<wd l="7574" t="1306" r="8203" b="1459">Tweets</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="8448" t="1301" r="9264" b="1459" alignment="centered" spaceAfter="46" lsp="exactly" lspExact="251" language="en">

<ln l="8448" t="1301" r="9264" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8448" t="1301" r="8558" b="1454">#</wd>

<space/>

<wd l="8616" t="1306" r="9264" b="1459">Tokens</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="9509" t="1301" r="10190" b="1502" alignment="centered" spaceAfter="46" lsp="exactly" lspExact="251" language="en">

<ln l="9509" t="1301" r="10190" b="1502" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9509" t="1301" r="9619" b="1454">#</wd>

<space/>

<wd l="9672" t="1301" r="10190" b="1502">Noisy</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6566" t="1622" r="7061" b="1776" alignment="centered" spaceAfter="41" lsp="exactly" lspExact="255" language="en">

<ln l="6566" t="1622" r="7061" b="1776" baseLine="1771" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="9">

<wd l="6566" t="1622" r="7061" b="1776">train</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7594" t="1618" r="8021" b="1776" alignment="centered" spaceAfter="28" lsp="exactly" lspExact="269" language="en">

<ln l="7594" t="1618" r="8021" b="1776" baseLine="1771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7594" t="1618" r="8021" b="1776">2950</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="8587" t="1618" r="9115" b="1776" alignment="centered" spaceAfter="28" lsp="exactly" lspExact="269" language="en">

<ln l="8587" t="1618" r="9115" b="1776" baseLine="1771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8587" t="1618" r="9115" b="1776">44385</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="9634" t="1618" r="10066" b="1776" alignment="centered" spaceAfter="28" lsp="exactly" lspExact="269" language="en">

<ln l="9634" t="1618" r="10066" b="1776" baseLine="1771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9634" t="1622" r="10066" b="1776">3942</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6638" t="1906" r="6989" b="2059" alignment="centered" spaceAfter="36" lsp="exactly" lspExact="237" language="en">

<ln l="6638" t="1906" r="6989" b="2059" baseLine="2050" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="-1">

<wd l="6638" t="1915" r="6989" b="2059">test</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7613" t="1901" r="8016" b="2059" alignment="centered" spaceAfter="23" lsp="exactly" lspExact="250" language="en">

<ln l="7613" t="1901" r="8016" b="2059" baseLine="2050" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7">

<wd l="7613" t="1901" r="8016" b="2059">1967</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="8592" t="1901" r="9106" b="2059" alignment="centered" spaceAfter="23" lsp="exactly" lspExact="250" language="en">

<ln l="8592" t="1901" r="9106" b="2059" baseLine="2050" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6">

<wd l="8592" t="1906" r="9106" b="2059">29421</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="9634" t="1901" r="10066" b="2059" alignment="centered" spaceAfter="23" lsp="exactly" lspExact="250" language="en">

<ln l="9634" t="1901" r="10066" b="2059" baseLine="2050" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9634" t="1901" r="10066" b="2059">2776</wd>

</ln>

</para>

</cell>

</table>

</column>

</section>

<section l="6221" t="2347" r="10527" b="2588">

<column l="6221" t="2347" r="10527" b="2588">

<para l="6912" t="2376" r="9744" b="2534" alignment="centered" lsp="exactly" lspExact="230" language="en">

<ln l="6912" t="2376" r="9744" b="2534" baseLine="2530" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6912" t="2376" r="7392" b="2534">Table</wd>

<space/>

<wd l="7469" t="2381" r="7598" b="2534">1:</wd>

<space/>

<wd l="7742" t="2376" r="8520" b="2534">Statistics</wd>

<space/>

<wd l="8587" t="2376" r="8774" b="2534">of</wd>

<space/>

<wd l="8818" t="2376" r="9082" b="2534">the</wd>

<space/>

<wd l="9144" t="2376" r="9744" b="2534">dataset</wd>

</ln>

</para>

</column>

</section>

<section l="1431" t="2588" r="10527" b="15331">

<column l="1431" t="2588" r="5818" b="15331">

<para l="1618" t="2602" r="5808" b="6595" alignment="justified" li="504" fli="-288" lsp="exactly" lspExact="267" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1618" t="2602" r="5808" b="2803" baseLine="2750">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1618" t="2602" r="1882" b="2750">5.</wd>

<space/>

<wd l="1882" t="2602" r="2318" b="2803">Split</wd>

<space/>

<wd l="2386" t="2616" r="2717" b="2760">two</wd>

<space/>

<wd l="2794" t="2606" r="3494" b="2803">merged</wd>

<space/>

<wd l="3571" t="2606" r="4190" b="2760">words:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="4306" t="2602" r="4685" b="2760">This</wd>

<space/>

<wd l="4766" t="2602" r="5102" b="2760">rule</wd>

<space/>

<wd l="5184" t="2602" r="5630" b="2803">splits</wd>

<space/>

<wd l="5712" t="2654" r="5808" b="2760">a</wd>

<space/>

</run>

</ln>

<ln l="1877" t="2870" r="5803" b="3072" baseLine="3024" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="2870" r="2347" b="3072">noisy</wd>

<space/>

<wd l="2438" t="2870" r="2928" b="3058">word,</wd>

<space/>

<wd l="3043" t="2870" r="3187" b="3024">if</wd>

<space/>

<wd l="3269" t="2870" r="3389" b="3029">it</wd>

<space/>

<wd l="3480" t="2870" r="3614" b="3029">is</wd>

<space/>

<wd l="3720" t="2923" r="3816" b="3029">a</wd>

<space/>

<wd l="3907" t="2870" r="5117" b="3029">concatenation</wd>

<space/>

<wd l="5213" t="2870" r="5405" b="3029">of</wd>

<space/>

<wd l="5482" t="2894" r="5803" b="3029">two</wd>

<space/>

</ln>

<ln l="1877" t="3144" r="5794" b="3346" baseLine="3293" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="3144" r="2309" b="3302">valid</wd>

<space/>

<wd l="2366" t="3144" r="2942" b="3302">words.</wd>

<space/>

<wd l="3038" t="3149" r="3346" b="3302">For</wd>

<space/>

<wd l="3403" t="3144" r="4138" b="3346">example</wd>

<space/>

<wd l="4219" t="3144" r="5126" b="3346">‘thankyou’</wd>

<space/>

<wd l="5213" t="3144" r="5347" b="3302">is</wd>

<space/>

<wd l="5424" t="3197" r="5794" b="3302">con-</wd>

</ln>

<ln l="1882" t="3413" r="5779" b="3614" baseLine="3562" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1882" t="3413" r="2774" b="3571">catenation</wd>

<space/>

<wd l="2837" t="3413" r="3029" b="3571">of</wd>

<space/>

<wd l="3077" t="3437" r="3398" b="3571">two</wd>

<space/>

<wd l="3470" t="3437" r="4171" b="3614">separate</wd>

<space/>

<wd l="4234" t="3413" r="4757" b="3571">words</wd>

<space/>

<wd l="4824" t="3413" r="5078" b="3571">i.e.</wd>

<space/>

<wd l="5198" t="3413" r="5779" b="3571">‘thank’</wd>

<space/>

</ln>

<ln l="1882" t="3686" r="5808" b="3888" baseLine="3835" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1882" t="3686" r="2194" b="3845">and</wd>

<space/>

<wd l="2309" t="3691" r="2803" b="3888">‘you’.</wd>

<space/>

<wd l="2995" t="3691" r="3283" b="3845">We</wd>

<space/>

<wd l="3379" t="3686" r="3715" b="3845">find</wd>

<space/>

<wd l="3816" t="3710" r="4090" b="3845">out</wd>

<space/>

<wd l="4181" t="3686" r="4627" b="3845">word</wd>

<space/>

<wd l="4723" t="3686" r="5064" b="3888">pair</wd>

<space/>

<wd l="5160" t="3710" r="5314" b="3845">at</wd>

<space/>

<wd l="5410" t="3686" r="5808" b="3845">each</wd>

<space/>

</ln>

<ln l="1886" t="3955" r="5808" b="4157" baseLine="4104" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1886" t="3955" r="2256" b="4157">split</wd>

<space/>

<wd l="2333" t="3955" r="2784" b="4157">point</wd>

<space/>

<wd l="2866" t="3955" r="3178" b="4114">and</wd>

<space/>

<wd l="3259" t="3955" r="3898" b="4157">applied</wd>

<space/>

<wd l="3974" t="3955" r="4282" b="4114">this</wd>

<space/>

<wd l="4368" t="3955" r="4709" b="4114">rule</wd>

<space/>

<wd l="4786" t="3955" r="5045" b="4114">for</wd>

<space/>

<wd l="5122" t="3955" r="5386" b="4114">the</wd>

<space/>

<wd l="5467" t="3955" r="5808" b="4157">pair</wd>

<space/>

</ln>

<ln l="1877" t="4224" r="5779" b="4426" baseLine="4378" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="4224" r="2203" b="4382">that</wd>

<space/>

<wd l="2280" t="4224" r="2563" b="4382">has</wd>

<space/>

<wd l="2645" t="4224" r="3034" b="4382">both</wd>

<space/>

<wd l="3106" t="4224" r="3538" b="4382">valid</wd>

<space/>

<wd l="3610" t="4224" r="4099" b="4382">word.</wd>

<space/>

<wd l="4238" t="4224" r="4776" b="4382">Token</wd>

<space/>

<wd l="4877" t="4224" r="5779" b="4426">‘thankyou’</wd>

<space/>

</ln>

<ln l="1877" t="4498" r="5794" b="4699" baseLine="4646" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="4498" r="2160" b="4656">has</wd>

<space/>

<wd l="2232" t="4498" r="3072" b="4699">following</wd>

<space/>

<wd l="3144" t="4498" r="3590" b="4656">word</wd>

<space/>

<wd l="3658" t="4498" r="3998" b="4699">pair</wd>

<space/>

<wd l="4061" t="4498" r="4320" b="4656">for</wd>

<space/>

<wd l="4382" t="4502" r="4483" b="4656">7</wd>

<space/>

<wd l="4565" t="4498" r="4872" b="4699">spit</wd>

<space/>

<wd l="4939" t="4498" r="5429" b="4699">point:</wd>

<space/>

<wd l="5539" t="4498" r="5794" b="4656">i.e.</wd>

<space/>

</ln>

<ln l="1886" t="4766" r="5794" b="4968" baseLine="4920" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1886" t="4771" r="2102" b="4963">(1:</wd>

<space/>

<wd l="2218" t="4771" r="2443" b="4954">‘t’,</wd>

<space/>

<wd l="2534" t="4766" r="3451" b="4968">‘hankyou’;</wd>

<space/>

<wd l="3528" t="4771" r="3677" b="4925">2:</wd>

<space/>

<wd l="3787" t="4766" r="4123" b="4954">‘th’,</wd>

<space/>

<wd l="4219" t="4766" r="5026" b="4968">‘ankyou’;</wd>

<space/>

<wd l="5102" t="4771" r="5246" b="4925">3:</wd>

<space/>

<wd l="5362" t="4766" r="5794" b="4954">‘tha’,</wd>

<space/>

</ln>

<ln l="1901" t="5040" r="5794" b="5242" baseLine="5189" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1901" t="5040" r="2611" b="5242">‘nkyou’;</wd>

<space/>

<wd l="2698" t="5045" r="2851" b="5198">4:</wd>

<space/>

<wd l="2986" t="5040" r="3528" b="5227">‘than’,</wd>

<space/>

<wd l="3634" t="5040" r="4238" b="5242">‘kyou’;</wd>

<space/>

<wd l="4330" t="5040" r="4478" b="5198">5:</wd>

<space/>

<wd l="4613" t="5040" r="5189" b="5227">‘thank,</wd>

<space/>

<wd l="5294" t="5045" r="5794" b="5242">‘you’;</wd>

<space/>

</ln>

<ln l="1882" t="5309" r="5794" b="5510" baseLine="5462" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1882" t="5309" r="2030" b="5467">6:</wd>

<space/>

<wd l="2218" t="5309" r="2976" b="5510">‘thanky’,</wd>

<space/>

<wd l="3115" t="5314" r="3504" b="5496">‘ou’;</wd>

<space/>

<wd l="3634" t="5309" r="3946" b="5467">and</wd>

<space/>

<wd l="4042" t="5314" r="4190" b="5467">7:</wd>

<space/>

<wd l="4382" t="5309" r="5246" b="5510">‘thankyo’,</wd>

<space/>

<wd l="5386" t="5314" r="5794" b="5506">‘u’;).</wd>

<space/>

</ln>

<ln l="1877" t="5582" r="5798" b="5784" baseLine="5731" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="5582" r="2357" b="5741">Word</wd>

<space/>

<wd l="2434" t="5582" r="2779" b="5784">pair</wd>

<space/>

<wd l="2866" t="5582" r="3528" b="5779">(‘thank,</wd>

<space/>

<wd l="3648" t="5587" r="4162" b="5784">‘you’)</wd>

<space/>

<wd l="4253" t="5606" r="4411" b="5741">at</wd>

<space/>

<wd l="4498" t="5582" r="4867" b="5784">split</wd>

<space/>

<wd l="4944" t="5582" r="5395" b="5784">point</wd>

<space/>

<wd l="5477" t="5582" r="5568" b="5741">5</wd>

<space/>

<wd l="5659" t="5582" r="5798" b="5741">is</wd>

<space/>

</ln>

<ln l="1882" t="5851" r="5794" b="6053" baseLine="6005" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1882" t="5851" r="2482" b="6010">chosen</wd>

<space/>

<wd l="2549" t="5851" r="2808" b="6010">for</wd>

<space/>

<wd l="2875" t="5851" r="3139" b="6010">the</wd>

<space/>

<wd l="3211" t="5851" r="4464" b="6010">normalization.</wd>

<space/>

<wd l="4584" t="5851" r="5179" b="6010">Before</wd>

<space/>

<wd l="5251" t="5851" r="5794" b="6053">apply-</wd>

</ln>

<ln l="1877" t="6125" r="5798" b="6326" baseLine="6274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="6125" r="2150" b="6326">ing</wd>

<space/>

<wd l="2218" t="6125" r="2525" b="6283">this</wd>

<space/>

<wd l="2597" t="6125" r="2933" b="6283">rule</wd>

<space/>

<wd l="3005" t="6178" r="3101" b="6283">a</wd>

<space/>

<wd l="3158" t="6125" r="3970" b="6283">threshold</wd>

<space/>

<wd l="4032" t="6125" r="4291" b="6283">for</wd>

<space/>

<wd l="4349" t="6125" r="4800" b="6283">word</wd>

<space/>

<wd l="4858" t="6125" r="5405" b="6326">length</wd>

<space/>

<wd l="5467" t="6178" r="5798" b="6283">was</wd>

<space/>

</ln>

<ln l="1877" t="6394" r="4622" b="6595" baseLine="6542" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1877" t="6394" r="2952" b="6595">heuristically</wd>

<space/>

<wd l="3019" t="6418" r="3254" b="6552">set</wd>

<space/>

<wd l="3307" t="6418" r="3475" b="6552">to</wd>

<space/>

<wd l="3538" t="6394" r="3638" b="6552">6</wd>

<space/>

<wd l="3701" t="6394" r="4622" b="6552">characters.</wd>

</ln>

</para>

<para l="1618" t="6859" r="5808" b="9499" alignment="justified" li="504" spaceBefore="194" fli="-288" lsp="exactly" lspExact="271" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1618" t="6859" r="5803" b="7018" baseLine="7008">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="1618" t="6859" r="1877" b="7008">6.</wd>

<space/>

<wd l="1877" t="6859" r="2520" b="7018">British</wd>

<space/>

<wd l="2611" t="6874" r="2789" b="7018">to</wd>

<space/>

<wd l="2885" t="6859" r="3802" b="7018">American</wd>

<space/>

<wd l="3898" t="6864" r="4790" b="7018">standard:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="4944" t="6859" r="5803" b="7018">American</wd>

<space/>

</run>

</ln>

<ln l="1886" t="7128" r="5808" b="7330" baseLine="7282" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1886" t="7128" r="2616" b="7286">standard</wd>

<space/>

<wd l="2678" t="7181" r="3010" b="7286">was</wd>

<space/>

<wd l="3082" t="7128" r="3883" b="7330">preferred</wd>

<space/>

<wd l="3950" t="7181" r="4118" b="7286">as</wd>

<space/>

<wd l="4195" t="7181" r="4392" b="7286">an</wd>

<space/>

<wd l="4469" t="7128" r="5074" b="7286">official</wd>

<space/>

<wd l="5141" t="7128" r="5808" b="7330">English</wd>

<space/>

</ln>

<ln l="1877" t="7402" r="5794" b="7603" baseLine="7550" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1877" t="7402" r="2664" b="7603">language</wd>

<space/>

<wd l="2722" t="7402" r="3451" b="7560">standard</wd>

<space/>

<wd l="3494" t="7402" r="3754" b="7560">for</wd>

<space/>

<wd l="3797" t="7402" r="4061" b="7560">the</wd>

<space/>

<wd l="4118" t="7402" r="4680" b="7560">shared</wd>

<space/>

<wd l="4723" t="7402" r="5117" b="7560">task.</wd>

<space/>

<wd l="5194" t="7406" r="5482" b="7560">We</wd>

<space/>

<wd l="5530" t="7402" r="5794" b="7560">de-</wd>

</ln>

<ln l="1882" t="7670" r="5808" b="7829" baseLine="7824" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1882" t="7670" r="2203" b="7829">fine</wd>

<space/>

<wd l="2290" t="7723" r="2386" b="7829">a</wd>

<space/>

<wd l="2467" t="7670" r="2803" b="7829">rule</wd>

<space/>

<wd l="2890" t="7670" r="3422" b="7829">which</wd>

<space/>

<wd l="3504" t="7670" r="4296" b="7829">identifies</wd>

<space/>

<wd l="4387" t="7670" r="4982" b="7829">British</wd>

<space/>

<wd l="5074" t="7670" r="5808" b="7829">standard</wd>

<space/>

</ln>

<ln l="1877" t="7944" r="5794" b="8146" baseLine="8093" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1877" t="7944" r="2323" b="8102">word</wd>

<space/>

<wd l="2400" t="7944" r="2707" b="8102">and</wd>

<space/>

<wd l="2784" t="7968" r="3422" b="8102">convert</wd>

<space/>

<wd l="3490" t="7944" r="3614" b="8102">it</wd>

<space/>

<wd l="3682" t="7968" r="3850" b="8102">to</wd>

<space/>

<wd l="3926" t="7944" r="5165" b="8146">corresponding</wd>

<space/>

<wd l="5242" t="7949" r="5794" b="8102">Amer-</wd>

</ln>

<ln l="1877" t="8213" r="5794" b="8414" baseLine="8366" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1877" t="8213" r="2237" b="8371">ican</wd>

<space/>

<wd l="2357" t="8213" r="3086" b="8371">standard</wd>

<space/>

<wd l="3197" t="8237" r="4229" b="8414">counterpart.</wd>

<space/>

<wd l="4459" t="8213" r="5150" b="8371">Notable</wd>

<space/>

<wd l="5261" t="8213" r="5794" b="8371">differ-</wd>

</ln>

<ln l="1882" t="8486" r="5803" b="8645" baseLine="8635" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1882" t="8539" r="2352" b="8645">ences</wd>

<space/>

<wd l="2405" t="8486" r="3134" b="8645">between</wd>

<space/>

<wd l="3178" t="8486" r="3442" b="8645">the</wd>

<space/>

<wd l="3490" t="8510" r="3811" b="8645">two</wd>

<space/>

<wd l="3869" t="8486" r="4675" b="8645">standards</wd>

<space/>

<wd l="4728" t="8486" r="5059" b="8645">that</wd>

<space/>

<wd l="5102" t="8539" r="5357" b="8645">we</wd>

<space/>

<wd l="5400" t="8486" r="5803" b="8645">have</wd>

<space/>

</ln>

<ln l="1877" t="8755" r="5794" b="8957" baseLine="8909" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="1877" t="8755" r="2981" b="8957">incorporated</wd>

<space/>

<wd l="3024" t="8755" r="3197" b="8909">in</wd>

<space/>

<wd l="3240" t="8755" r="3504" b="8914">the</wd>

<space/>

<wd l="3547" t="8755" r="3998" b="8914">work</wd>

<space/>

<wd l="4046" t="8808" r="4306" b="8914">are</wd>

<space/>

<wd l="4373" t="8760" r="4762" b="8914">‘our’</wd>

<space/>

<wd l="4834" t="8779" r="4997" b="8914">to</wd>

<space/>

<wd l="5069" t="8760" r="5347" b="8914">‘or’</wd>

<space/>

<wd l="5429" t="8760" r="5794" b="8957">(e.g.</wd>

<space/>

</ln>

<ln l="1882" t="9029" r="5803" b="9230" baseLine="9178">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="1882" t="9029" r="2462" b="9187">labour</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="2520" t="9053" r="2688" b="9187">to</wd>

<space/>

</run>

<wd l="2750" t="9029" r="3341" b="9226"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">labor</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="3432" t="9029" r="3768" b="9187">‘ise’</wd>

<space/>

<wd l="3850" t="9053" r="4018" b="9187">to</wd>

<space/>

<wd l="4099" t="9029" r="4454" b="9187">‘ize’</wd>

<space/>

<wd l="4541" t="9034" r="4910" b="9230">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="5002" t="9029" r="5573" b="9187">realise</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="5635" t="9053" r="5803" b="9187">to</wd>

<space/>

</run>

</ln>

<ln l="1886" t="9298" r="5794" b="9499" baseLine="9446">

<wd l="1886" t="9298" r="2578" b="9494"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">realize</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="2659" t="9302" r="2928" b="9456">‘re’</wd>

<space/>

<wd l="3000" t="9322" r="3163" b="9456">to</wd>

<space/>

<wd l="3240" t="9302" r="3504" b="9456">‘er’</wd>

<space/>

<wd l="3586" t="9302" r="3950" b="9499">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="4032" t="9331" r="4560" b="9456">centre</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><wd l="4613" t="9322" r="4776" b="9456">to</wd>

<space/>

</run>

<wd l="4834" t="9302" r="5438" b="9494"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">center</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5"><space/>

<wd l="5501" t="9322" r="5794" b="9456">etc.</wd>

</run>

</ln>

</para>

<para l="1440" t="9778" r="4464" b="9998" alignment="left" spaceBefore="231" lsp="exactly" lspExact="273" language="en">

<ln l="1440" t="9778" r="4464" b="9998" baseLine="9946" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="6">

<wd l="1440" t="9778" r="1555" b="9946">4</wd>

<space/>

<wd l="1800" t="9782" r="2654" b="9950">Datasets</wd>

<space/>

<wd l="2726" t="9782" r="3106" b="9950">and</wd>

<space/>

<wd l="3168" t="9778" r="4464" b="9998">Experiments</wd>

</ln>

</para>

<para l="1440" t="10219" r="5808" b="10963" alignment="justified" spaceBefore="147" lsp="exactly" lspExact="271" language="en">

<ln l="1440" t="10219" r="5808" b="10421" baseLine="10373" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="10224" r="1622" b="10373">In</wd>

<space/>

<wd l="1709" t="10219" r="2669" b="10421">subsequent</wd>

<space/>

<wd l="2750" t="10219" r="3739" b="10378">subsections</wd>

<space/>

<wd l="3821" t="10272" r="4075" b="10378">we</wd>

<space/>

<wd l="4157" t="10219" r="4776" b="10378">discuss</wd>

<space/>

<wd l="4858" t="10219" r="5122" b="10378">the</wd>

<space/>

<wd l="5203" t="10219" r="5808" b="10378">dataset</wd>

<space/>

</ln>

<ln l="1440" t="10493" r="5794" b="10694" baseLine="10642" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="10493" r="1843" b="10651">used</wd>

<space/>

<wd l="1906" t="10493" r="2074" b="10646">in</wd>

<space/>

<wd l="2141" t="10493" r="2405" b="10651">the</wd>

<space/>

<wd l="2482" t="10517" r="3082" b="10694">system</wd>

<space/>

<wd l="3144" t="10493" r="3456" b="10651">and</wd>

<space/>

<wd l="3528" t="10493" r="4421" b="10651">evaluation</wd>

<space/>

<wd l="4483" t="10493" r="5098" b="10680">results,</wd>

<space/>

<wd l="5174" t="10546" r="5794" b="10694">respec-</wd>

</ln>

<ln l="1440" t="10762" r="1958" b="10963" baseLine="10910" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="10762" r="1958" b="10963">tively.</wd>

</ln>

</para>

<para l="1440" t="11222" r="2731" b="11381" alignment="left" spaceBefore="211" lsp="exactly" lspExact="251" language="en">

<ln l="1440" t="11222" r="2731" b="11381" baseLine="11371" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="12">

<wd l="1440" t="11222" r="1704" b="11381">4.1</wd>

<space/>

<wd l="1930" t="11227" r="2381" b="11381">Data</wd>

<space/>

<wd l="2438" t="11222" r="2731" b="11381">Set</wd>

</ln>

</para>

<para l="1440" t="11578" r="5808" b="14174" alignment="justified" spaceBefore="88" lsp="exactly" lspExact="271" language="en">

<ln l="1445" t="11578" r="5808" b="11779" baseLine="11731" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="11578" r="2280" b="11779">Objective</wd>

<space/>

<wd l="2366" t="11578" r="2558" b="11736">of</wd>

<space/>

<wd l="2626" t="11578" r="2890" b="11736">the</wd>

<space/>

<wd l="2986" t="11578" r="3542" b="11736">shared</wd>

<space/>

<wd l="3624" t="11578" r="3979" b="11736">task</wd>

<space/>

<wd l="4061" t="11630" r="4387" b="11736">was</wd>

<space/>

<wd l="4478" t="11602" r="4646" b="11736">to</wd>

<space/>

<wd l="4728" t="11578" r="5405" b="11779">identify</wd>

<space/>

<wd l="5496" t="11578" r="5808" b="11736">and</wd>

<space/>

</ln>

<ln l="1440" t="11851" r="5803" b="12053" baseLine="12000" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="11851" r="2314" b="12010">normalize</wd>

<space/>

<wd l="2381" t="11851" r="2650" b="12010">the</wd>

<space/>

<wd l="2717" t="11851" r="3187" b="12053">noisy</wd>

<space/>

<wd l="3259" t="11875" r="3586" b="12010">text</wd>

<space/>

<wd l="3658" t="11851" r="3826" b="12005">in</wd>

<space/>

<wd l="3893" t="11875" r="4498" b="12010">tweets.</wd>

<space/>

<wd l="4627" t="11851" r="5054" b="12053">Only</wd>

<space/>

<wd l="5126" t="11851" r="5803" b="12053">training</wd>

<space/>

</ln>

<ln l="1445" t="12120" r="5794" b="12322" baseLine="12274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="12120" r="2050" b="12278">dataset</wd>

<space/>

<wd l="2122" t="12173" r="2448" b="12278">was</wd>

<space/>

<wd l="2534" t="12120" r="3307" b="12322">provided</wd>

<space/>

<wd l="3379" t="12120" r="3595" b="12322">by</wd>

<space/>

<wd l="3672" t="12120" r="3936" b="12278">the</wd>

<space/>

<wd l="4022" t="12120" r="4584" b="12278">shared</wd>

<space/>

<wd l="4656" t="12120" r="5011" b="12278">task</wd>

<space/>

<wd l="5088" t="12120" r="5794" b="12322">organiz-</wd>

</ln>

<ln l="1445" t="12394" r="5798" b="12595" baseLine="12542" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="12446" r="1738" b="12552">ers.</wd>

<space/>

<wd l="1819" t="12394" r="2160" b="12552">The</wd>

<space/>

<wd l="2213" t="12394" r="2890" b="12595">training</wd>

<space/>

<wd l="2952" t="12394" r="3557" b="12552">dataset</wd>

<space/>

<wd l="3614" t="12394" r="4406" b="12595">comprise</wd>

<space/>

<wd l="4469" t="12394" r="4661" b="12552">of</wd>

<space/>

<wd l="4709" t="12394" r="5189" b="12581">2,950</wd>

<space/>

<wd l="5246" t="12418" r="5798" b="12552">tweets</wd>

<space/>

</ln>

<ln l="1445" t="12662" r="5808" b="12864" baseLine="12816" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="12662" r="1757" b="12821">and</wd>

<space/>

<wd l="1858" t="12715" r="1954" b="12821">a</wd>

<space/>

<wd l="2050" t="12662" r="2438" b="12821">total</wd>

<space/>

<wd l="2544" t="12662" r="2736" b="12821">of</wd>

<space/>

<wd l="2827" t="12667" r="3307" b="12850">3,942</wd>

<space/>

<wd l="3413" t="12662" r="3883" b="12864">noisy</wd>

<space/>

<wd l="3984" t="12662" r="4546" b="12821">tokens</wd>

<space/>

<wd l="4651" t="12715" r="5074" b="12821">were</wd>

<space/>

<wd l="5174" t="12686" r="5808" b="12864">present</wd>

<space/>

</ln>

<ln l="1440" t="12936" r="5808" b="13138" baseLine="13085" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="12936" r="1613" b="13090">in</wd>

<space/>

<wd l="1714" t="12936" r="1978" b="13094">the</wd>

<space/>

<wd l="2088" t="12936" r="2731" b="13094">dataset.</wd>

<space/>

<wd l="2957" t="12941" r="3139" b="13090">In</wd>

<space/>

<wd l="3245" t="12936" r="3931" b="13094">absence</wd>

<space/>

<wd l="4037" t="12936" r="4229" b="13094">of</wd>

<space/>

<wd l="4320" t="12936" r="4584" b="13094">the</wd>

<space/>

<wd l="4690" t="12936" r="5808" b="13138">development</wd>

<space/>

</ln>

<ln l="1445" t="13205" r="5794" b="13392" baseLine="13354" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="13205" r="2088" b="13392">dataset,</wd>

<space/>

<wd l="2189" t="13258" r="2443" b="13363">we</wd>

<space/>

<wd l="2525" t="13258" r="2818" b="13363">use</wd>

<space/>

<wd l="2904" t="13205" r="3437" b="13363">3-fold</wd>

<space/>

<wd l="3518" t="13258" r="3955" b="13363">cross</wd>

<space/>

<wd l="4046" t="13205" r="4915" b="13363">validation</wd>

<space/>

<wd l="4997" t="13205" r="5256" b="13363">for</wd>

<space/>

<wd l="5333" t="13205" r="5794" b="13363">train-</wd>

</ln>

<ln l="1440" t="13478" r="5794" b="13680" baseLine="13627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="13478" r="1714" b="13680">ing</wd>

<space/>

<wd l="1805" t="13478" r="2069" b="13637">the</wd>

<space/>

<wd l="2155" t="13478" r="2741" b="13637">model.</wd>

<space/>

<wd l="2914" t="13478" r="3350" b="13637">Gold</wd>

<space/>

<wd l="3442" t="13478" r="4171" b="13637">standard</wd>

<space/>

<wd l="4258" t="13502" r="4560" b="13637">test</wd>

<space/>

<wd l="4646" t="13478" r="5323" b="13637">datasets</wd>

<space/>

<wd l="5424" t="13531" r="5794" b="13637">con-</wd>

</ln>

<ln l="1440" t="13747" r="5803" b="13934" baseLine="13896" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1440" t="13747" r="1843" b="13906">tains</wd>

<space/>

<wd l="1934" t="13747" r="2390" b="13934">1,967</wd>

<space/>

<wd l="2458" t="13771" r="3058" b="13906">tweets.</wd>

<space/>

<wd l="3144" t="13747" r="3619" b="13906">Table</wd>

<space/>

<wd l="3701" t="13752" r="3763" b="13901">1</wd>

<space/>

<wd l="3845" t="13747" r="4114" b="13906">list</wd>

<space/>

<wd l="4166" t="13747" r="4435" b="13906">the</wd>

<space/>

<wd l="4502" t="13747" r="5237" b="13906">statistics</wd>

<space/>

<wd l="5304" t="13747" r="5496" b="13906">of</wd>

<space/>

<wd l="5539" t="13747" r="5803" b="13906">the</wd>

<space/>

</ln>

<ln l="1445" t="14016" r="2174" b="14174" baseLine="14170" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="14016" r="2174" b="14174">datasets.</wd>

</ln>

</para>

<para l="1440" t="14477" r="3936" b="14678" alignment="left" spaceBefore="210" lsp="exactly" lspExact="251" language="en">

<ln l="1440" t="14477" r="3936" b="14678" baseLine="14630" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="5">

<wd l="1440" t="14477" r="1709" b="14635">4.2</wd>

<space/>

<wd l="1930" t="14477" r="3202" b="14678">Experimental</wd>

<space/>

<wd l="3264" t="14482" r="3936" b="14635">Results</wd>

</ln>

</para>

<para l="1445" t="14837" r="5803" b="15307" alignment="justified" spaceBefore="86" spaceAfter="4" lsp="exactly" lspExact="271" language="en">

<ln l="1445" t="14837" r="5794" b="15038" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="14837" r="2467" b="14995">Conditional</wd>

<space/>

<wd l="2549" t="14837" r="3288" b="14995">Random</wd>

<space/>

<wd l="3365" t="14837" r="3811" b="14995">Field</wd>

<space/>

<wd l="3898" t="14837" r="5222" b="15038">(CRF)(Lafferty</wd>

<space/>

<wd l="5309" t="14861" r="5462" b="14995">et</wd>

<space/>

<wd l="5544" t="14837" r="5794" b="15024">al.,</wd>

<space/>

</ln>

<ln l="1445" t="15106" r="5803" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="15110" r="1939" b="15302">2001)</wd>

<space/>

<wd l="2002" t="15158" r="2328" b="15264">was</wd>

<space/>

<wd l="2386" t="15106" r="2789" b="15264">used</wd>

<space/>

<wd l="2842" t="15158" r="3010" b="15264">as</wd>

<space/>

<wd l="3072" t="15158" r="3168" b="15264">a</wd>

<space/>

<wd l="3216" t="15106" r="3605" b="15264">base</wd>

<space/>

<wd l="3653" t="15106" r="4368" b="15307">learning</wd>

<space/>

<wd l="4426" t="15106" r="5270" b="15307">algorithm</wd>

<space/>

<wd l="5318" t="15106" r="5491" b="15259">in</wd>

<space/>

<wd l="5539" t="15106" r="5803" b="15264">the</wd>

</ln>

</para>

</column>

<column l="6140" t="2588" r="10527" b="15331">

<para l="6144" t="3154" r="10517" b="9350" alignment="justified" spaceBefore="539" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="3154" r="10502" b="3389" baseLine="3337">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6144" t="3187" r="6946" b="3389">proposed</wd>

<space/>

<wd l="7003" t="3187" r="7493" b="3346">work.</wd>

<space/>

<wd l="7584" t="3192" r="7872" b="3346">We</wd>

<space/>

<wd l="7930" t="3240" r="8218" b="3346">use</wd>

<space/>

<wd l="8280" t="3187" r="8544" b="3346">the</wd>

<space/>

<wd l="8611" t="3192" r="9259" b="3346">CRF++</wd>

<space/>

</run>

<wd l="9326" t="3154" r="9394" b="3269" underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">8</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="9470" t="3187" r="9970" b="3346">based</wd>

<space/>

<wd l="10027" t="3187" r="10502" b="3389">pack-</wd>

</run>

</ln>

<ln l="6149" t="3461" r="10502" b="3662" baseLine="3610" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="3514" r="6446" b="3662">age</wd>

<space/>

<wd l="6518" t="3461" r="6778" b="3619">for</wd>

<space/>

<wd l="6845" t="3461" r="7522" b="3662">training</wd>

<space/>

<wd l="7603" t="3461" r="7915" b="3619">and</wd>

<space/>

<wd l="7982" t="3461" r="8611" b="3662">testing.</wd>

<space/>

<wd l="8741" t="3466" r="8962" b="3619">To</wd>

<space/>

<wd l="9043" t="3461" r="9754" b="3619">evaluate</wd>

<space/>

<wd l="9826" t="3461" r="10090" b="3619">the</wd>

<space/>

<wd l="10166" t="3514" r="10502" b="3662">per-</wd>

</ln>

<ln l="6144" t="3730" r="10507" b="3931" baseLine="3883" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="3730" r="6970" b="3888">formance</wd>

<space/>

<wd l="7027" t="3730" r="7214" b="3888">of</wd>

<space/>

<wd l="7253" t="3730" r="7522" b="3888">the</wd>

<space/>

<wd l="7579" t="3754" r="8222" b="3931">system,</wd>

<space/>

<wd l="8290" t="3782" r="8486" b="3888">an</wd>

<space/>

<wd l="8544" t="3730" r="9442" b="3888">evaluation</wd>

<space/>

<wd l="9499" t="3730" r="9974" b="3931">script</wd>

<space/>

<wd l="10032" t="3730" r="10507" b="3931">along</wd>

<space/>

</ln>

<ln l="6144" t="4003" r="10498" b="4205" baseLine="4152" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="4003" r="6533" b="4162">with</wd>

<space/>

<wd l="6614" t="4003" r="6878" b="4162">the</wd>

<space/>

<wd l="6965" t="4003" r="7565" b="4162">dataset</wd>

<space/>

<wd l="7642" t="4056" r="7973" b="4162">was</wd>

<space/>

<wd l="8059" t="4003" r="8832" b="4205">provided</wd>

<space/>

<wd l="8909" t="4003" r="9125" b="4205">by</wd>

<space/>

<wd l="9206" t="4003" r="9475" b="4162">the</wd>

<space/>

<wd l="9557" t="4003" r="10498" b="4205">organizers.</wd>

<space/>

</ln>

<ln l="6144" t="4272" r="10507" b="4474" baseLine="4421" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="4277" r="6432" b="4430">We</wd>

<space/>

<wd l="6518" t="4272" r="7224" b="4474">perform</wd>

<space/>

<wd l="7315" t="4272" r="7843" b="4430">3-fold</wd>

<space/>

<wd l="7934" t="4272" r="9317" b="4430">cross-validation</wd>

<space/>

<wd l="9403" t="4272" r="10253" b="4474">technique</wd>

<space/>

<wd l="10339" t="4296" r="10507" b="4430">to</wd>

<space/>

</ln>

<ln l="6149" t="4546" r="10507" b="4747" baseLine="4694" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="4546" r="6922" b="4704">fine-tune</wd>

<space/>

<wd l="6998" t="4546" r="7262" b="4704">the</wd>

<space/>

<wd l="7349" t="4570" r="7987" b="4747">system,</wd>

<space/>

<wd l="8083" t="4546" r="8395" b="4704">and</wd>

<space/>

<wd l="8467" t="4546" r="9144" b="4747">identify</wd>

<space/>

<wd l="9221" t="4546" r="9490" b="4704">the</wd>

<space/>

<wd l="9562" t="4546" r="9917" b="4704">best</wd>

<space/>

<wd l="9994" t="4546" r="10507" b="4747">fitting</wd>

<space/>

</ln>

<ln l="6144" t="4814" r="10512" b="5016" baseLine="4963" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="4814" r="6749" b="4973">feature</wd>

<space/>

<wd l="6835" t="4814" r="7963" b="4973">combination.</wd>

<space/>

<wd l="8117" t="4814" r="8458" b="4973">The</wd>

<space/>

<wd l="8534" t="4814" r="9638" b="5016">performance</wd>

<space/>

<wd l="9720" t="4814" r="9912" b="4973">of</wd>

<space/>

<wd l="9984" t="4814" r="10512" b="4973">3-fold</wd>

<space/>

</ln>

<ln l="6149" t="5083" r="10512" b="5285" baseLine="5237" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="5136" r="6586" b="5242">cross</wd>

<space/>

<wd l="6667" t="5083" r="7536" b="5242">validation</wd>

<space/>

<wd l="7613" t="5083" r="8587" b="5285">experiment</wd>

<space/>

<wd l="8659" t="5083" r="9168" b="5285">yields</wd>

<space/>

<wd l="9250" t="5083" r="9518" b="5242">the</wd>

<space/>

<wd l="9590" t="5088" r="10512" b="5242">F-measure</wd>

<space/>

</ln>

<ln l="6149" t="5357" r="10502" b="5558" baseLine="5506" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="5357" r="6341" b="5515">of</wd>

<space/>

<wd l="6403" t="5362" r="7066" b="5515">92.21%</wd>

<space/>

<wd l="7142" t="5357" r="7402" b="5515">for</wd>

<space/>

<wd l="7469" t="5357" r="8626" b="5515">identification</wd>

<space/>

<wd l="8693" t="5357" r="9422" b="5558">problem</wd>

<space/>

<wd l="9504" t="5357" r="9821" b="5554">(i.e.</wd>

<space/>

<wd l="9960" t="5357" r="10502" b="5515">denot-</wd>

</ln>

<ln l="6144" t="5626" r="10517" b="5827" baseLine="5779" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="5626" r="6422" b="5827">ing</wd>

<space/>

<wd l="6518" t="5626" r="6898" b="5827">only</wd>

<space/>

<wd l="6989" t="5626" r="7258" b="5784">the</wd>

<space/>

<wd l="7354" t="5626" r="8261" b="5784">candidates</wd>

<space/>

<wd l="8357" t="5626" r="8616" b="5784">for</wd>

<space/>

<wd l="8702" t="5626" r="10027" b="5822">normalization).</wd>

<space/>

<wd l="10210" t="5630" r="10517" b="5784">For</wd>

<space/>

</ln>

<ln l="6144" t="5899" r="10502" b="6058" baseLine="6048" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="5899" r="6413" b="6058">the</wd>

<space/>

<wd l="6466" t="5923" r="6768" b="6058">test</wd>

<space/>

<wd l="6830" t="5923" r="7066" b="6058">set</wd>

<space/>

<wd l="7118" t="5899" r="7238" b="6058">it</wd>

<space/>

<wd l="7301" t="5899" r="7824" b="6058">shows</wd>

<space/>

<wd l="7886" t="5899" r="8150" b="6058">the</wd>

<space/>

<wd l="8203" t="5904" r="9125" b="6058">F-measure</wd>

<space/>

<wd l="9182" t="5899" r="9374" b="6058">of</wd>

<space/>

<wd l="9422" t="5899" r="10128" b="6058">86.63%.</wd>

<space/>

<wd l="10210" t="5899" r="10502" b="6053">Af-</wd>

</ln>

<ln l="6144" t="6168" r="10512" b="6370" baseLine="6322" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="6192" r="6379" b="6326">ter</wd>

<space/>

<wd l="6432" t="6168" r="7387" b="6370">identifying</wd>

<space/>

<wd l="7450" t="6168" r="7714" b="6326">the</wd>

<space/>

<wd l="7776" t="6168" r="8683" b="6326">candidates</wd>

<space/>

<wd l="8755" t="6168" r="8942" b="6326">of</wd>

<space/>

<wd l="8986" t="6168" r="10200" b="6326">normalization</wd>

<space/>

<wd l="10258" t="6221" r="10512" b="6326">we</wd>

<space/>

</ln>

<ln l="6149" t="6442" r="10502" b="6643" baseLine="6590" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="6442" r="6629" b="6643">apply</wd>

<space/>

<wd l="6701" t="6442" r="7531" b="6600">heuristics</wd>

<space/>

<wd l="7613" t="6466" r="7776" b="6600">to</wd>

<space/>

<wd l="7853" t="6442" r="8558" b="6643">perform</wd>

<space/>

<wd l="8626" t="6442" r="9883" b="6600">normalization.</wd>

<space/>

<wd l="10013" t="6442" r="10502" b="6600">Rules</wd>

<space/>

</ln>

<ln l="6144" t="6710" r="10512" b="6912" baseLine="6864" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="6763" r="6566" b="6869">were</wd>

<space/>

<wd l="6653" t="6710" r="7296" b="6912">applied</wd>

<space/>

<wd l="7378" t="6710" r="8232" b="6912">according</wd>

<space/>

<wd l="8314" t="6734" r="8482" b="6869">to</wd>

<space/>

<wd l="8563" t="6710" r="8966" b="6869">their</wd>

<space/>

<wd l="9048" t="6763" r="10070" b="6912">appearance.</wd>

<space/>

<wd l="10224" t="6715" r="10512" b="6869">We</wd>

<space/>

</ln>

<ln l="6144" t="6984" r="10502" b="7186" baseLine="7133" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="6984" r="6547" b="7142">have</wd>

<space/>

<wd l="6619" t="6984" r="7022" b="7142">tried</wd>

<space/>

<wd l="7094" t="6984" r="7723" b="7142">various</wd>

<space/>

<wd l="7805" t="6984" r="8894" b="7142">combination</wd>

<space/>

<wd l="8966" t="6984" r="9158" b="7142">of</wd>

<space/>

<wd l="9216" t="6984" r="9552" b="7142">rule</wd>

<space/>

<wd l="9638" t="7037" r="10502" b="7186">sequences</wd>

<space/>

</ln>

<ln l="6149" t="7253" r="10512" b="7454" baseLine="7402" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="7253" r="6461" b="7411">and</wd>

<space/>

<wd l="6509" t="7253" r="7018" b="7411">found</wd>

<space/>

<wd l="7061" t="7253" r="7392" b="7411">that</wd>

<space/>

<wd l="7435" t="7253" r="7699" b="7411">the</wd>

<space/>

<wd l="7747" t="7253" r="8222" b="7411">listed</wd>

<space/>

<wd l="8275" t="7306" r="9067" b="7454">sequence</wd>

<space/>

<wd l="9110" t="7253" r="9250" b="7411">is</wd>

<space/>

<wd l="9302" t="7253" r="9571" b="7411">the</wd>

<space/>

<wd l="9619" t="7306" r="9931" b="7411">one</wd>

<space/>

<wd l="9979" t="7253" r="10512" b="7411">which</wd>

<space/>

</ln>

<ln l="6149" t="7526" r="10512" b="7728" baseLine="7675" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="7526" r="6590" b="7728">gives</wd>

<space/>

<wd l="6682" t="7579" r="6869" b="7685">us</wd>

<space/>

<wd l="6960" t="7526" r="7459" b="7685">better</wd>

<space/>

<wd l="7541" t="7526" r="8688" b="7728">performance.</wd>

<space/>

<wd l="8851" t="7526" r="9384" b="7685">While</wd>

<space/>

<wd l="9470" t="7579" r="9725" b="7685">we</wd>

<space/>

<wd l="9806" t="7526" r="10512" b="7728">perform</wd>

<space/>

</ln>

<ln l="6149" t="7795" r="10498" b="7997" baseLine="7944" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="7795" r="6682" b="7954">3-fold</wd>

<space/>

<wd l="6782" t="7848" r="7219" b="7954">cross</wd>

<space/>

<wd l="7325" t="7795" r="8194" b="7954">validation</wd>

<space/>

<wd l="8294" t="7848" r="8544" b="7954">we</wd>

<space/>

<wd l="8650" t="7795" r="9192" b="7954">obtain</wd>

<space/>

<wd l="9293" t="7795" r="9557" b="7954">the</wd>

<space/>

<wd l="9658" t="7795" r="10498" b="7997">precision,</wd>

<space/>

</ln>

<ln l="6144" t="8064" r="10502" b="8251" baseLine="8218" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="8064" r="6629" b="8222">recall</wd>

<space/>

<wd l="6720" t="8064" r="7032" b="8222">and</wd>

<space/>

<wd l="7114" t="8069" r="8035" b="8222">F-measure</wd>

<space/>

<wd l="8117" t="8064" r="8664" b="8222">values</wd>

<space/>

<wd l="8760" t="8064" r="8947" b="8222">of</wd>

<space/>

<wd l="9029" t="8064" r="9734" b="8251">88.59%,</wd>

<space/>

<wd l="9840" t="8069" r="10502" b="8222">74.92%</wd>

<space/>

</ln>

<ln l="6149" t="8338" r="10512" b="8539" baseLine="8486" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="8338" r="6461" b="8496">and</wd>

<space/>

<wd l="6552" t="8342" r="7258" b="8525">81.19%,</wd>

<space/>

<wd l="7354" t="8338" r="8429" b="8539">respectively.</wd>

<space/>

<wd l="8587" t="8338" r="9202" b="8539">Finally</wd>

<space/>

<wd l="9283" t="8390" r="9538" b="8496">we</wd>

<space/>

<wd l="9624" t="8338" r="10166" b="8496">obtain</wd>

<space/>

<wd l="10243" t="8338" r="10512" b="8496">the</wd>

<space/>

</ln>

<ln l="6144" t="8606" r="10498" b="8808" baseLine="8760" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="8606" r="6989" b="8808">precision,</wd>

<space/>

<wd l="7056" t="8606" r="7536" b="8765">recall</wd>

<space/>

<wd l="7598" t="8606" r="7910" b="8765">and</wd>

<space/>

<wd l="7963" t="8611" r="8885" b="8765">F-measure</wd>

<space/>

<wd l="8942" t="8606" r="9485" b="8765">values</wd>

<space/>

<wd l="9552" t="8606" r="9744" b="8765">of</wd>

<space/>

<wd l="9787" t="8606" r="10498" b="8794">90.26%,</wd>

<space/>

</ln>

<ln l="6144" t="8880" r="10512" b="9082" baseLine="9029" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="8885" r="6811" b="9038">71.91%</wd>

<space/>

<wd l="6869" t="8880" r="7181" b="9038">and</wd>

<space/>

<wd l="7234" t="8880" r="7939" b="9067">80.05%,</wd>

<space/>

<wd l="7997" t="8880" r="9072" b="9082">respectively.</wd>

<space/>

<wd l="9149" t="8880" r="9782" b="9038">Results</wd>

<space/>

<wd l="9840" t="8880" r="10032" b="9038">of</wd>

<space/>

<wd l="10061" t="8880" r="10512" b="9038">these</wd>

<space/>

</ln>

<ln l="6149" t="9149" r="9115" b="9350" baseLine="9302" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="9149" r="7200" b="9350">experiments</wd>

<space/>

<wd l="7267" t="9202" r="7531" b="9307">are</wd>

<space/>

<wd l="7594" t="9149" r="8150" b="9307">shown</wd>

<space/>

<wd l="8203" t="9149" r="8376" b="9302">in</wd>

<space/>

<wd l="8429" t="9149" r="8904" b="9307">Table</wd>

<space/>

<wd l="8966" t="9154" r="9115" b="9307">2.</wd>

</ln>

</para>

<para l="6144" t="9427" r="10512" b="13109" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="6365" t="9427" r="10507" b="9629" baseLine="9576" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6365" t="9432" r="6648" b="9586">We</wd>

<space/>

<wd l="6739" t="9427" r="7349" b="9629">closely</wd>

<space/>

<wd l="7445" t="9427" r="8107" b="9629">analyze</wd>

<space/>

<wd l="8194" t="9427" r="8458" b="9586">the</wd>

<space/>

<wd l="8549" t="9480" r="9048" b="9586">errors</wd>

<space/>

<wd l="9144" t="9427" r="10210" b="9586">encountered</wd>

<space/>

<wd l="10291" t="9427" r="10507" b="9629">by</wd>

<space/>

</ln>

<ln l="6149" t="9701" r="10512" b="9902" baseLine="9850" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="9754" r="6442" b="9859">our</wd>

<space/>

<wd l="6523" t="9725" r="7162" b="9902">system.</wd>

<space/>

<wd l="7310" t="9706" r="7598" b="9859">We</wd>

<space/>

<wd l="7680" t="9701" r="8462" b="9859">observed</wd>

<space/>

<wd l="8534" t="9701" r="8866" b="9859">that</wd>

<space/>

<wd l="8942" t="9754" r="9418" b="9902">many</wd>

<space/>

<wd l="9504" t="9754" r="10003" b="9859">errors</wd>

<space/>

<wd l="10085" t="9754" r="10512" b="9859">were</wd>

<space/>

</ln>

<ln l="6149" t="9970" r="10502" b="10128" baseLine="10118" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="9970" r="6461" b="10128">due</wd>

<space/>

<wd l="6504" t="9994" r="6672" b="10128">to</wd>

<space/>

<wd l="6720" t="9970" r="6989" b="10128">the</wd>

<space/>

<wd l="7032" t="9970" r="7810" b="10128">incorrect</wd>

<space/>

<wd l="7853" t="9970" r="9005" b="10128">identification</wd>

<space/>

<wd l="9058" t="9970" r="9245" b="10128">of</wd>

<space/>

<wd l="9278" t="9970" r="9542" b="10128">the</wd>

<space/>

<wd l="9595" t="9970" r="10502" b="10128">candidates</wd>

<space/>

</ln>

<ln l="6144" t="10238" r="10498" b="10440" baseLine="10392" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="10238" r="6475" b="10397">that</wd>

<space/>

<wd l="6547" t="10238" r="6960" b="10397">need</wd>

<space/>

<wd l="7032" t="10262" r="7200" b="10397">to</wd>

<space/>

<wd l="7277" t="10238" r="7483" b="10397">be</wd>

<space/>

<wd l="7560" t="10238" r="8582" b="10397">normalized.</wd>

<space/>

<wd l="8722" t="10238" r="9062" b="10397">The</wd>

<space/>

<wd l="9115" t="10238" r="9854" b="10440">jumbled</wd>

<space/>

<wd l="9926" t="10238" r="10498" b="10426">words,</wd>

<space/>

</ln>

<ln l="6149" t="10512" r="10502" b="10714" baseLine="10661">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6149" t="10565" r="6451" b="10714">e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6629" t="10512" r="7090" b="10694">‘liek’,</wd>

<space/>

<wd l="7219" t="10512" r="7757" b="10670">’whta’</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="7843" t="10536" r="8136" b="10670">etc.</wd>

<space/>

<wd l="8280" t="10565" r="8702" b="10670">were</wd>

<space/>

<wd l="8784" t="10536" r="9062" b="10670">not</wd>

<space/>

<wd l="9139" t="10512" r="9874" b="10714">properly</wd>

<space/>

<wd l="9955" t="10565" r="10502" b="10714">recog-</wd>

</run>

</ln>

<ln l="6144" t="10781" r="10512" b="10982" baseLine="10934" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="10781" r="6662" b="10939">nized.</wd>

<space/>

<wd l="6821" t="10781" r="7253" b="10939">With</wd>

<space/>

<wd l="7334" t="10834" r="7781" b="10939">more</wd>

<space/>

<wd l="7867" t="10805" r="8587" b="10939">accurate</wd>

<space/>

<wd l="8674" t="10781" r="9826" b="10939">identification</wd>

<space/>

<wd l="9917" t="10805" r="10512" b="10982">system</wd>

<space/>

</ln>

<ln l="6144" t="11054" r="10512" b="11256" baseLine="11203" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11107" r="6398" b="11213">we</wd>

<space/>

<wd l="6442" t="11054" r="6989" b="11213">would</wd>

<space/>

<wd l="7032" t="11054" r="7435" b="11213">have</wd>

<space/>

<wd l="7483" t="11054" r="8246" b="11213">achieved</wd>

<space/>

<wd l="8290" t="11054" r="8789" b="11213">better</wd>

<space/>

<wd l="8827" t="11054" r="9355" b="11213">result.</wd>

<space/>

<wd l="9432" t="11059" r="9734" b="11213">For</wd>

<space/>

<wd l="9778" t="11054" r="10512" b="11256">example</wd>

<space/>

</ln>

<ln l="6144" t="11323" r="10502" b="11525" baseLine="11477" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11323" r="6317" b="11477">in</wd>

<space/>

<wd l="6394" t="11376" r="6763" b="11482">case</wd>

<space/>

<wd l="6840" t="11323" r="7032" b="11482">of</wd>

<space/>

<wd l="7114" t="11328" r="7589" b="11482">100%</wd>

<space/>

<wd l="7670" t="11323" r="8141" b="11525">noisy</wd>

<space/>

<wd l="8213" t="11347" r="8539" b="11482">text</wd>

<space/>

<wd l="8611" t="11323" r="9806" b="11510">identification,</wd>

<space/>

<wd l="9893" t="11376" r="10147" b="11482">we</wd>

<space/>

<wd l="10224" t="11323" r="10502" b="11482">ob-</wd>

</ln>

<ln l="6144" t="11597" r="10498" b="11755" baseLine="11746" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11597" r="6682" b="11755">tained</wd>

<space/>

<wd l="6725" t="11650" r="6926" b="11755">an</wd>

<space/>

<wd l="6970" t="11597" r="7685" b="11755">increase</wd>

<space/>

<wd l="7733" t="11597" r="7925" b="11755">of</wd>

<space/>

<wd l="7958" t="11597" r="8506" b="11755">3.75%</wd>

<space/>

<wd l="8558" t="11597" r="8731" b="11750">in</wd>

<space/>

<wd l="8774" t="11650" r="9067" b="11755">our</wd>

<space/>

<wd l="9110" t="11597" r="9490" b="11755">final</wd>

<space/>

<wd l="9538" t="11602" r="10498" b="11755">F-measure.</wd>

<space/>

</ln>

<ln l="6144" t="11866" r="10502" b="12067" baseLine="12019" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11870" r="6451" b="12024">For</wd>

<space/>

<wd l="6499" t="11866" r="7714" b="12024">normalization</wd>

<space/>

<wd l="7771" t="11918" r="8227" b="12053">error,</wd>

<space/>

<wd l="8294" t="11918" r="8587" b="12024">our</wd>

<space/>

<wd l="8635" t="11866" r="9293" b="12024">method</wd>

<space/>

<wd l="9350" t="11866" r="10099" b="12067">arguably</wd>

<space/>

<wd l="10162" t="11866" r="10502" b="12067">lags</wd>

<space/>

</ln>

<ln l="6144" t="12139" r="10502" b="12341" baseLine="12288" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12139" r="6739" b="12298">behind</wd>

<space/>

<wd l="6826" t="12139" r="6998" b="12293">in</wd>

<space/>

<wd l="7085" t="12163" r="7406" b="12298">two</wd>

<space/>

<wd l="7498" t="12139" r="8050" b="12298">fronts:</wd>

<space/>

<wd l="8203" t="12144" r="8362" b="12336">a)</wd>

<space/>

<wd l="8462" t="12139" r="9466" b="12341">ambiguities</wd>

<space/>

<wd l="9562" t="12139" r="9734" b="12293">in</wd>

<space/>

<wd l="9821" t="12139" r="10502" b="12298">normal-</wd>

</ln>

<ln l="6144" t="12408" r="10507" b="12610" baseLine="12557" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12408" r="6739" b="12566">ization</wd>

<space/>

<wd l="6802" t="12408" r="7114" b="12566">and</wd>

<space/>

<wd l="7171" t="12408" r="7344" b="12605">b)</wd>

<space/>

<wd l="7416" t="12432" r="8525" b="12610">many-to-one</wd>

<space/>

<wd l="8587" t="12408" r="9346" b="12610">mapping</wd>

<space/>

<wd l="9413" t="12461" r="9912" b="12566">cases.</wd>

<space/>

<wd l="10003" t="12413" r="10507" b="12610">Many</wd>

<space/>

</ln>

<ln l="6149" t="12682" r="10512" b="12883" baseLine="12830" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="12682" r="6341" b="12840">of</wd>

<space/>

<wd l="6398" t="12682" r="6845" b="12840">these</wd>

<space/>

<wd l="6912" t="12734" r="7286" b="12883">may</wd>

<space/>

<wd l="7358" t="12682" r="7565" b="12840">be</wd>

<space/>

<wd l="7632" t="12682" r="8323" b="12840">reduced</wd>

<space/>

<wd l="8395" t="12682" r="8606" b="12883">by</wd>

<space/>

<wd l="8683" t="12682" r="9283" b="12840">careful</wd>

<space/>

<wd l="9360" t="12682" r="9926" b="12883">design</wd>

<space/>

<wd l="9998" t="12682" r="10190" b="12840">of</wd>

<space/>

<wd l="10243" t="12682" r="10512" b="12840">the</wd>

<space/>

</ln>

<ln l="6144" t="12950" r="7421" b="13109" baseLine="13099" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12950" r="6893" b="13109">heuristic</wd>

<space/>

<wd l="6950" t="12950" r="7421" b="13109">rules.</wd>

</ln>

</para>

<para l="6149" t="13435" r="7642" b="13608" alignment="left" spaceBefore="230" lsp="exactly" lspExact="273" language="en">

<ln l="6149" t="13435" r="7642" b="13608" baseLine="13598" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="12">

<wd l="6149" t="13440" r="6259" b="13608">5</wd>

<space/>

<wd l="6514" t="13435" r="7642" b="13608">Conclusion</wd>

</ln>

</para>

<para l="6144" t="13882" r="10512" b="14899" alignment="justified" spaceBefore="160" spaceAfter="129" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="13882" r="10512" b="14083" baseLine="14035" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="13886" r="6326" b="14035">In</wd>

<space/>

<wd l="6394" t="13882" r="6701" b="14040">this</wd>

<space/>

<wd l="6773" t="13934" r="7262" b="14083">paper</wd>

<space/>

<wd l="7325" t="13934" r="7579" b="14040">we</wd>

<space/>

<wd l="7646" t="13882" r="8050" b="14040">have</wd>

<space/>

<wd l="8117" t="13882" r="8846" b="14083">reported</wd>

<space/>

<wd l="8914" t="13934" r="9206" b="14040">our</wd>

<space/>

<wd l="9264" t="13882" r="9787" b="14040">works</wd>

<space/>

<wd l="9864" t="13882" r="10190" b="14040">that</wd>

<space/>

<wd l="10258" t="13934" r="10512" b="14040">we</wd>

<space/>

</ln>

<ln l="6149" t="14155" r="10502" b="14357" baseLine="14304" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="14155" r="6754" b="14314">carried</wd>

<space/>

<wd l="6811" t="14179" r="7085" b="14314">out</wd>

<space/>

<wd l="7142" t="14208" r="7310" b="14314">as</wd>

<space/>

<wd l="7373" t="14184" r="7714" b="14357">part</wd>

<space/>

<wd l="7771" t="14155" r="7958" b="14314">of</wd>

<space/>

<wd l="8006" t="14208" r="8294" b="14314">our</wd>

<space/>

<wd l="8342" t="14155" r="9446" b="14357">participation</wd>

<space/>

<wd l="9499" t="14155" r="9672" b="14309">in</wd>

<space/>

<wd l="9725" t="14155" r="9989" b="14314">the</wd>

<space/>

<wd l="10042" t="14155" r="10502" b="14314">Twit-</wd>

</ln>

<ln l="6144" t="14424" r="10502" b="14582" baseLine="14578" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="14448" r="6379" b="14582">ter</wd>

<space/>

<wd l="6461" t="14448" r="6787" b="14582">text</wd>

<space/>

<wd l="6869" t="14424" r="8078" b="14582">normalization</wd>

<space/>

<wd l="8170" t="14424" r="8731" b="14582">shared</wd>

<space/>

<wd l="8818" t="14424" r="9211" b="14582">task.</wd>

<space/>

<wd l="9374" t="14429" r="9662" b="14582">We</wd>

<space/>

<wd l="9744" t="14424" r="10147" b="14582">have</wd>

<space/>

<wd l="10238" t="14424" r="10502" b="14582">de-</wd>

</ln>

<ln l="6144" t="14698" r="10507" b="14899" baseLine="14846" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="14698" r="6835" b="14899">veloped</wd>

<space/>

<wd l="6931" t="14750" r="7027" b="14856">a</wd>

<space/>

<wd l="7118" t="14698" r="7685" b="14899">hybrid</wd>

<space/>

<wd l="7786" t="14722" r="8386" b="14899">system</wd>

<space/>

<wd l="8477" t="14698" r="9010" b="14856">where</wd>

<space/>

<wd l="9106" t="14698" r="9274" b="14851">in</wd>

<space/>

<wd l="9365" t="14698" r="9634" b="14856">the</wd>

<space/>

<wd l="9730" t="14698" r="10070" b="14856">first</wd>

<space/>

<wd l="10171" t="14722" r="10507" b="14899">step</wd>

</ln>

</para>

<rulerline l="6140" t="15048" r="7344" b="15048" type="single" width="10" color="000000"/>

<para l="6403" t="15101" r="8635" b="15302" alignment="left" li="216" spaceBefore="58" spaceAfter="14" lsp="exactly" lspExact="207" language="en">

<ln l="6403" t="15101" r="8635" b="15302" baseLine="15256">

<wd l="6403" t="15101" r="8635" b="15302"><run underlined="none" subsuperscript="none" fontSize="650" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">8</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">http://taku910.github.io/crfpp/</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="1431" t="15746" r="6134" b="15975">

<para l="5771" t="15792" r="6134" b="15946" alignment="right" lsp="exactly" lspExact="223" language="en">

<ln l="5837" t="15792" r="6130" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="13">

<wd l="5837" t="15792" r="6130" b="15946">109</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4316.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1240" marginRight="1378" marginBottom="1292" offsetX="-20" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1240" r="10531" b="3698">

<column l="1440" t="1240" r="10531" b="3698">

<table l="2556" t="1258" r="9379" b="2717" alignment="left" li="1116" ri="1152" spaceBefore="18" spaceAfter="181">

<topBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<gridTable>

<gridCol>1528</gridCol>

<gridCol>1013</gridCol>

<gridCol>1095</gridCol>

<gridCol>820</gridCol>

<gridCol>1229</gridCol>

<gridCol>1138</gridCol>

<gridRow>302</gridRow>

<gridRow>298</gridRow>

<gridRow>293</gridRow>

<gridRow>297</gridRow>

<gridRow>269</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="3120" t="1306" r="3557" b="1459" alignment="centered" spaceAfter="44" lsp="exactly" lspExact="253" language="en">

<ln l="3120" t="1306" r="3557" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="3120" t="1306" r="3557" b="1459">Task</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="4238" t="1306" r="4944" b="1459" alignment="centered" spaceAfter="44" lsp="exactly" lspExact="253" language="en">

<ln l="4238" t="1306" r="4944" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4238" t="1306" r="4944" b="1459">Dataset</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="5213" t="1301" r="6072" b="1459" alignment="centered" spaceAfter="44" lsp="exactly" lspExact="253" language="en">

<ln l="5213" t="1301" r="6072" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="5213" t="1301" r="6072" b="1459">Precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6317" t="1306" r="6888" b="1459" alignment="centered" spaceAfter="44" lsp="exactly" lspExact="253" language="en">

<ln l="6317" t="1306" r="6888" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6317" t="1306" r="6888" b="1459">Recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7133" t="1306" r="8122" b="1459" alignment="centered" spaceAfter="44" lsp="exactly" lspExact="253" language="en">

<ln l="7133" t="1306" r="8122" b="1459" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7133" t="1306" r="8122" b="1459">F-measure</wd>

</ln>

</para>

</cell>

<cell gridColFrom="5" gridColTill="5" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="8362" t="1301" r="9240" b="1502" alignment="centered" spaceAfter="44" lsp="exactly" lspExact="253" language="en">

<ln l="8362" t="1301" r="9240" b="1502" baseLine="1450" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8362" t="1301" r="9240" b="1502">Accuracy</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="2" alignment="left" verticalAlignment="bottom">

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="2755" t="1891" r="3922" b="2050" alignment="centered" spaceBefore="268" spaceAfter="46" lsp="exactly" lspExact="271" language="en">

<ln l="2755" t="1891" r="3922" b="2050" baseLine="2040" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="2755" t="1891" r="3922" b="2050">Identification</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="4205" t="1618" r="4973" b="1819" alignment="centered" spaceAfter="29" lsp="exactly" lspExact="245" language="en">

<ln l="4205" t="1618" r="4973" b="1819" baseLine="1771" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="4205" t="1618" r="4733" b="1819">3-fold</wd>

<space/>

<wd l="4786" t="1670" r="4973" b="1776">cv</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="5405" t="1618" r="5866" b="1776" alignment="left" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<tabs position="5405"/>

<ln l="5405" t="1618" r="5866" b="1776" baseLine="1771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="5405" t="1618" r="5866" b="1776">89.51</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6360" t="1618" r="6840" b="1776" alignment="left" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<tabs position="6360"/>

<ln l="6360" t="1618" r="6840" b="1776" baseLine="1771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6360" t="1618" r="6840" b="1776">95.08</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7387" t="1622" r="7848" b="1776" alignment="left" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<tabs position="7387"/>

<ln l="7387" t="1622" r="7848" b="1776" baseLine="1771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7387" t="1622" r="7848" b="1776">92.21</wd>

</ln>

</para>

</cell>

<cell gridColFrom="5" gridColTill="5" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="8563" t="1622" r="9043" b="1776" alignment="centered" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<ln l="8563" t="1622" r="9043" b="1776" baseLine="1771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8563" t="1622" r="9043" b="1776">98.70</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="4440" t="1901" r="4747" b="2050" alignment="centered" spaceAfter="48" lsp="exactly" lspExact="239" language="en">

<ln l="4440" t="1901" r="4747" b="2050" baseLine="2040" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4440" t="1925" r="4747" b="2050">test</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="5400" t="1896" r="5880" b="2050" alignment="left" spaceAfter="46" lsp="exactly" lspExact="241" language="en">

<tabs position="5400"/>

<ln l="5400" t="1896" r="5880" b="2050" baseLine="2040" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="5400" t="1896" r="5880" b="2050">93.08</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6365" t="1896" r="6826" b="2050" alignment="left" spaceAfter="46" lsp="exactly" lspExact="241" language="en">

<tabs position="6365"/>

<ln l="6365" t="1896" r="6826" b="2050" baseLine="2040" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="6365" t="1896" r="6826" b="2050">81.01</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7392" t="1891" r="7858" b="2050" alignment="left" spaceAfter="46" lsp="exactly" lspExact="241" language="en">

<tabs position="7392"/>

<ln l="7392" t="1891" r="7858" b="2050" baseLine="2040" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7392" t="1891" r="7858" b="2050">86.63</wd>

</ln>

</para>

</cell>

<cell gridColFrom="5" gridColTill="5" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="8563" t="1891" r="9043" b="2050" alignment="centered" spaceAfter="46" lsp="exactly" lspExact="241" language="en">

<ln l="8563" t="1891" r="9043" b="2050" baseLine="2040" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8563" t="1891" r="9043" b="2050">97.64</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="4" alignment="left" verticalAlignment="bottom">

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="2707" t="2482" r="3970" b="2640" alignment="centered" spaceBefore="267" spaceAfter="18" lsp="exactly" lspExact="271" language="en">

<ln l="2707" t="2482" r="3970" b="2640" baseLine="2630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2707" t="2482" r="3970" b="2640">Normalization</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="4205" t="2208" r="4973" b="2410" alignment="centered" spaceAfter="29" lsp="exactly" lspExact="245" language="en">

<ln l="4205" t="2208" r="4973" b="2410" baseLine="2362" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="4205" t="2208" r="4733" b="2410">3-fold</wd>

<space/>

<wd l="4786" t="2261" r="4973" b="2366">cv</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="5405" t="2208" r="5880" b="2366" alignment="left" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<tabs position="5405"/>

<ln l="5405" t="2208" r="5880" b="2366" baseLine="2362" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="5405" t="2208" r="5880" b="2366">88.59</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6355" t="2213" r="6845" b="2366" alignment="left" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<tabs position="6355"/>

<ln l="6355" t="2213" r="6845" b="2366" baseLine="2362" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6355" t="2213" r="6845" b="2366">74.92</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7392" t="2213" r="7862" b="2366" alignment="left" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<tabs position="7392"/>

<ln l="7392" t="2213" r="7862" b="2366" baseLine="2362" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="7392" t="2213" r="7862" b="2366">81.19</wd>

</ln>

</para>

</cell>

<cell gridColFrom="5" gridColTill="5" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="8770" t="2213" r="8827" b="2362" alignment="centered" spaceAfter="27" lsp="exactly" lspExact="270" language="en">

<ln l="8770" t="2213" r="8827" b="2362" baseLine="2362" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8770" t="2304" r="8827" b="2323">-</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="4440" t="2491" r="4747" b="2640" alignment="centered" spaceAfter="20" lsp="exactly" lspExact="239" language="en">

<ln l="4440" t="2491" r="4747" b="2640" baseLine="2630" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4440" t="2515" r="4747" b="2640">test</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="5400" t="2482" r="5885" b="2640" alignment="left" spaceAfter="18" lsp="exactly" lspExact="241" language="en">

<tabs position="5400"/>

<ln l="5400" t="2482" r="5885" b="2640" baseLine="2630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="5400" t="2482" r="5885" b="2640">90.26</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6355" t="2486" r="6826" b="2640" alignment="left" spaceAfter="18" lsp="exactly" lspExact="241" language="en">

<tabs position="6355"/>

<ln l="6355" t="2486" r="6826" b="2640" baseLine="2630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6355" t="2486" r="6826" b="2640">71.91</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7392" t="2482" r="7858" b="2640" alignment="left" spaceAfter="18" lsp="exactly" lspExact="241" language="en">

<tabs position="7392"/>

<ln l="7392" t="2482" r="7858" b="2640" baseLine="2630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7392" t="2482" r="7858" b="2640">80.05</wd>

</ln>

</para>

</cell>

<cell gridColFrom="5" gridColTill="5" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="8770" t="2482" r="8827" b="2630" alignment="centered" spaceAfter="18" lsp="exactly" lspExact="241" language="en">

<ln l="8770" t="2482" r="8827" b="2630" baseLine="2630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8770" t="2578" r="8827" b="2597">-</wd>

</ln>

</para>

</cell>

</table>

<para l="3326" t="2957" r="8616" b="3158" alignment="centered" spaceAfter="517" lsp="exactly" lspExact="271" language="en">

<ln l="3326" t="2957" r="8616" b="3158" baseLine="3110" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3326" t="2957" r="3806" b="3115">Table</wd>

<space/>

<wd l="3864" t="2962" r="4013" b="3115">2:</wd>

<space/>

<wd l="4152" t="2957" r="4709" b="3115">Result</wd>

<space/>

<wd l="4766" t="2957" r="4958" b="3115">of</wd>

<space/>

<wd l="5002" t="2957" r="5266" b="3115">the</wd>

<space/>

<wd l="5323" t="2957" r="6120" b="3158">proposed</wd>

<space/>

<wd l="6187" t="2981" r="6826" b="3158">system.</wd>

<space/>

<wd l="6902" t="2957" r="7181" b="3110">All</wd>

<space/>

<wd l="7238" t="2957" r="7781" b="3115">values</wd>

<space/>

<wd l="7848" t="3010" r="8107" b="3115">are</wd>

<space/>

<wd l="8165" t="2957" r="8338" b="3110">in</wd>

<space/>

<wd l="8400" t="2962" r="8616" b="3115">%.</wd>

</ln>

</para>

</column>

</section>

<section l="1440" t="3698" r="10531" b="15315">

<column l="1440" t="3698" r="5842" b="15315">

<para l="1440" t="3758" r="5808" b="6125" alignment="justified" lsp="exactly" lspExact="270" language="en">

<ln l="1440" t="3758" r="5794" b="3960" baseLine="3907" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="3811" r="1694" b="3917">we</wd>

<space/>

<wd l="1781" t="3758" r="2453" b="3960">identify</wd>

<space/>

<wd l="2544" t="3758" r="2808" b="3917">the</wd>

<space/>

<wd l="2899" t="3758" r="3806" b="3917">candidates</wd>

<space/>

<wd l="3902" t="3758" r="4162" b="3917">for</wd>

<space/>

<wd l="4243" t="3758" r="5453" b="3917">normalization</wd>

<space/>

<wd l="5539" t="3811" r="5794" b="3917">us-</wd>

</ln>

<ln l="1440" t="4027" r="5803" b="4229" baseLine="4181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4027" r="1714" b="4229">ing</wd>

<space/>

<wd l="1781" t="4080" r="1877" b="4186">a</wd>

<space/>

<wd l="1934" t="4032" r="2342" b="4186">CRF</wd>

<space/>

<wd l="2400" t="4027" r="2899" b="4186">based</wd>

<space/>

<wd l="2962" t="4027" r="3797" b="4229">approach,</wd>

<space/>

<wd l="3874" t="4027" r="4186" b="4186">and</wd>

<space/>

<wd l="4238" t="4027" r="4411" b="4181">in</wd>

<space/>

<wd l="4469" t="4027" r="4733" b="4186">the</wd>

<space/>

<wd l="4800" t="4027" r="5400" b="4186">second</wd>

<space/>

<wd l="5462" t="4051" r="5803" b="4229">step</wd>

<space/>

</ln>

<ln l="1440" t="4301" r="5803" b="4502" baseLine="4450" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4354" r="1694" b="4459">we</wd>

<space/>

<wd l="1762" t="4301" r="2616" b="4502">employed</wd>

<space/>

<wd l="2683" t="4301" r="3278" b="4459">several</wd>

<space/>

<wd l="3346" t="4301" r="4171" b="4459">heuristics</wd>

<space/>

<wd l="4243" t="4301" r="4502" b="4459">for</wd>

<space/>

<wd l="4560" t="4301" r="5477" b="4502">converting</wd>

<space/>

<wd l="5539" t="4301" r="5803" b="4459">the</wd>

<space/>

</ln>

<ln l="1440" t="4570" r="5794" b="4728" baseLine="4718" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4570" r="2386" b="4728">wordforms</wd>

<space/>

<wd l="2443" t="4570" r="2779" b="4728">into</wd>

<space/>

<wd l="2832" t="4570" r="3096" b="4728">the</wd>

<space/>

<wd l="3144" t="4570" r="4128" b="4728">normalized</wd>

<space/>

<wd l="4171" t="4570" r="4642" b="4728">form.</wd>

<space/>

<wd l="4718" t="4574" r="5002" b="4728">We</wd>

<space/>

<wd l="5050" t="4570" r="5453" b="4728">have</wd>

<space/>

<wd l="5501" t="4570" r="5794" b="4723">im-</wd>

</ln>

<ln l="1440" t="4843" r="5794" b="5045" baseLine="4992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4843" r="2352" b="5045">plemented</wd>

<space/>

<wd l="2414" t="4843" r="2678" b="5002">the</wd>

<space/>

<wd l="2741" t="4843" r="3422" b="5002">features</wd>

<space/>

<wd l="3494" t="4843" r="4032" b="5002">which</wd>

<space/>

<wd l="4094" t="4896" r="4358" b="5002">are</wd>

<space/>

<wd l="4421" t="4843" r="5011" b="5045">mostly</wd>

<space/>

<wd l="5083" t="4843" r="5794" b="5002">domain-</wd>

</ln>

<ln l="1440" t="5112" r="5803" b="5314" baseLine="5261" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="5112" r="2510" b="5314">independent</wd>

<space/>

<wd l="2563" t="5112" r="2731" b="5266">in</wd>

<space/>

<wd l="2784" t="5112" r="3053" b="5270">the</wd>

<space/>

<wd l="3115" t="5165" r="3576" b="5270">sense</wd>

<space/>

<wd l="3634" t="5112" r="3960" b="5270">that</wd>

<space/>

<wd l="4013" t="5165" r="4267" b="5270">we</wd>

<space/>

<wd l="4330" t="5112" r="4603" b="5270">did</wd>

<space/>

<wd l="4656" t="5136" r="4939" b="5270">not</wd>

<space/>

<wd l="4992" t="5112" r="5458" b="5270">make</wd>

<space/>

<wd l="5515" t="5165" r="5803" b="5270">use</wd>

<space/>

</ln>

<ln l="1445" t="5381" r="5808" b="5582" baseLine="5534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="5381" r="1637" b="5539">of</wd>

<space/>

<wd l="1694" t="5434" r="2002" b="5582">any</wd>

<space/>

<wd l="2078" t="5381" r="2731" b="5539">domain</wd>

<space/>

<wd l="2808" t="5381" r="3461" b="5582">specific</wd>

<space/>

<wd l="3533" t="5434" r="4349" b="5539">resources</wd>

<space/>

<wd l="4435" t="5381" r="4992" b="5539">and/or</wd>

<space/>

<wd l="5054" t="5381" r="5472" b="5539">tools</wd>

<space/>

<wd l="5549" t="5381" r="5808" b="5539">for</wd>

<space/>

</ln>

<ln l="1440" t="5654" r="5808" b="5813" baseLine="5803" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="5654" r="1843" b="5813">their</wd>

<space/>

<wd l="1896" t="5654" r="2803" b="5813">extraction.</wd>

<space/>

<wd l="2885" t="5654" r="3542" b="5813">Official</wd>

<space/>

<wd l="3600" t="5654" r="4493" b="5813">evaluation</wd>

<space/>

<wd l="4555" t="5654" r="5078" b="5813">shows</wd>

<space/>

<wd l="5136" t="5654" r="5467" b="5813">that</wd>

<space/>

<wd l="5520" t="5707" r="5808" b="5813">our</wd>

<space/>

</ln>

<ln l="1450" t="5923" r="5146" b="6125" baseLine="6077" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="5947" r="2050" b="6125">system</wd>

<space/>

<wd l="2107" t="5923" r="2837" b="6082">achieves</wd>

<space/>

<wd l="2899" t="5923" r="3163" b="6082">the</wd>

<space/>

<wd l="3221" t="5928" r="4138" b="6082">F-measure</wd>

<space/>

<wd l="4200" t="5923" r="4392" b="6082">of</wd>

<space/>

<wd l="4440" t="5923" r="5146" b="6082">80.05%.</wd>

</ln>

</para>

<para l="1440" t="6197" r="5803" b="8026" alignment="justified" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="1656" t="6197" r="5794" b="6398" baseLine="6346" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="6202" r="1843" b="6350">In</wd>

<space/>

<wd l="1901" t="6197" r="2419" b="6355">future</wd>

<space/>

<wd l="2477" t="6250" r="2731" b="6355">we</wd>

<space/>

<wd l="2794" t="6197" r="3336" b="6355">would</wd>

<space/>

<wd l="3394" t="6197" r="3720" b="6355">like</wd>

<space/>

<wd l="3778" t="6221" r="3946" b="6355">to</wd>

<space/>

<wd l="4013" t="6250" r="4450" b="6398">carry</wd>

<space/>

<wd l="4517" t="6221" r="4795" b="6355">out</wd>

<space/>

<wd l="4848" t="6250" r="5299" b="6355">more</wd>

<space/>

<wd l="5362" t="6250" r="5794" b="6355">com-</wd>

</ln>

<ln l="1440" t="6466" r="5803" b="6667" baseLine="6619" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="6466" r="2376" b="6667">prehensive</wd>

<space/>

<wd l="2438" t="6466" r="3130" b="6667">analysis</wd>

<space/>

<wd l="3202" t="6518" r="3418" b="6624">on</wd>

<space/>

<wd l="3475" t="6466" r="3744" b="6624">the</wd>

<space/>

<wd l="3806" t="6466" r="4699" b="6624">evaluation</wd>

<space/>

<wd l="4762" t="6466" r="5371" b="6624">results.</wd>

<space/>

<wd l="5467" t="6466" r="5803" b="6624">The</wd>

<space/>

</ln>

<ln l="1440" t="6739" r="5794" b="6941" baseLine="6888" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="6739" r="2122" b="6898">features</wd>

<space/>

<wd l="2194" t="6739" r="2506" b="6898">and</wd>

<space/>

<wd l="2568" t="6739" r="2981" b="6898">rules</wd>

<space/>

<wd l="3048" t="6739" r="3379" b="6898">that</wd>

<space/>

<wd l="3437" t="6792" r="3691" b="6898">we</wd>

<space/>

<wd l="3754" t="6739" r="4152" b="6898">used</wd>

<space/>

<wd l="4210" t="6739" r="4584" b="6898">here</wd>

<space/>

<wd l="4651" t="6792" r="4910" b="6898">are</wd>

<space/>

<wd l="4973" t="6792" r="5352" b="6941">very</wd>

<space/>

<wd l="5424" t="6792" r="5794" b="6941">gen-</wd>

</ln>

<ln l="1445" t="7008" r="5803" b="7210" baseLine="7162" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="7008" r="1766" b="7166">eral</wd>

<space/>

<wd l="1858" t="7008" r="2165" b="7166">and</wd>

<space/>

<wd l="2261" t="7008" r="3595" b="7210">straightforward</wd>

<space/>

<wd l="3677" t="7008" r="3845" b="7162">in</wd>

<space/>

<wd l="3926" t="7032" r="4517" b="7166">nature.</wd>

<space/>

<wd l="4680" t="7013" r="4862" b="7162">In</wd>

<space/>

<wd l="4949" t="7008" r="5467" b="7166">future</wd>

<space/>

<wd l="5549" t="7061" r="5803" b="7166">we</wd>

<space/>

</ln>

<ln l="1440" t="7282" r="5794" b="7483" baseLine="7430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="7282" r="1982" b="7440">would</wd>

<space/>

<wd l="2054" t="7282" r="2376" b="7440">like</wd>

<space/>

<wd l="2448" t="7306" r="2616" b="7440">to</wd>

<space/>

<wd l="2688" t="7282" r="3317" b="7483">modify</wd>

<space/>

<wd l="3394" t="7282" r="3658" b="7440">the</wd>

<space/>

<wd l="3739" t="7306" r="4339" b="7483">system</wd>

<space/>

<wd l="4406" t="7282" r="4742" b="7440">into</wd>

<space/>

<wd l="4819" t="7334" r="4915" b="7440">a</wd>

<space/>

<wd l="4982" t="7282" r="5390" b="7483">fully</wd>

<space/>

<wd l="5467" t="7334" r="5794" b="7440">ma-</wd>

</ln>

<ln l="1445" t="7550" r="5794" b="7752" baseLine="7704" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="7550" r="1910" b="7709">chine</wd>

<space/>

<wd l="1987" t="7550" r="2698" b="7752">learning</wd>

<space/>

<wd l="2774" t="7550" r="3274" b="7709">based</wd>

<space/>

<wd l="3350" t="7550" r="4147" b="7752">approach</wd>

<space/>

<wd l="4224" t="7550" r="4536" b="7709">and</wd>

<space/>

<wd l="4608" t="7574" r="4886" b="7752">put</wd>

<space/>

<wd l="4963" t="7574" r="5395" b="7709">extra</wd>

<space/>

<wd l="5472" t="7603" r="5794" b="7709">em-</wd>

</ln>

<ln l="1440" t="7824" r="2866" b="8026" baseLine="7973" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="7824" r="1978" b="8026">phasis</wd>

<space/>

<wd l="2045" t="7877" r="2261" b="7982">on</wd>

<space/>

<wd l="2318" t="7877" r="2866" b="7982">errors.</wd>

</ln>

</para>

<para l="1445" t="8539" r="2544" b="8712" alignment="left" spaceBefore="459" lsp="exactly" lspExact="273" language="en">

<ln l="1445" t="8539" r="2544" b="8712" baseLine="8702" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8539" r="2544" b="8712">References</wd>

</ln>

</para>

<para l="1445" t="8918" r="5808" b="10205" alignment="justified" li="216" spaceBefore="127" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1445" t="8918" r="5808" b="9106" baseLine="9058" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="8918" r="2174" b="9106">Grzegorz</wd>

<space/>

<wd l="2256" t="8918" r="3024" b="9101">Chrupala.</wd>

<space/>

<wd l="3192" t="8918" r="3624" b="9062">2014.</wd>

<space/>

<wd l="3787" t="8918" r="4790" b="9106">Normalizing</wd>

<space/>

<wd l="4867" t="8938" r="5371" b="9062">tweets</wd>

<space/>

<wd l="5453" t="8918" r="5808" b="9062">with</wd>

<space/>

</ln>

<ln l="1661" t="9139" r="5808" b="9326" baseLine="9278" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1661" t="9139" r="1958" b="9283">edit</wd>

<space/>

<wd l="2054" t="9139" r="2563" b="9322">scripts</wd>

<space/>

<wd l="2659" t="9139" r="2942" b="9283">and</wd>

<space/>

<wd l="3034" t="9158" r="3754" b="9283">recurrent</wd>

<space/>

<wd l="3840" t="9139" r="4334" b="9283">neural</wd>

<space/>

<wd l="4435" t="9139" r="5429" b="9326">embeddings.</wd>

<space/>

<wd l="5640" t="9144" r="5808" b="9278">In</wd>

<space/>

</ln>

<ln l="1656" t="9360" r="5798" b="9542" baseLine="9494" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="9360" r="2645" b="9542">Proceedings</wd>

<space/>

<wd l="2707" t="9360" r="2890" b="9542">of</wd>

<space/>

<wd l="2923" t="9360" r="3158" b="9504">the</wd>

<space/>

<wd l="3221" t="9360" r="3624" b="9504">52nd</wd>

<space/>

<wd l="3662" t="9360" r="4258" b="9504">Annual</wd>

<space/>

<wd l="4306" t="9365" r="4963" b="9542">Meeting</wd>

<space/>

<wd l="5030" t="9360" r="5208" b="9542">of</wd>

<space/>

<wd l="5242" t="9360" r="5477" b="9504">the</wd>

<space/>

<wd l="5525" t="9365" r="5798" b="9504">As-</wd>

</ln>

<ln l="1656" t="9576" r="5784" b="9758" baseLine="9715" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="9581" r="2386" b="9720">sociation</wd>

<space/>

<wd l="2405" t="9576" r="2683" b="9758">for</wd>

<space/>

<wd l="2731" t="9576" r="3922" b="9758">Computational</wd>

<space/>

<wd l="3965" t="9581" r="4872" b="9758">Linguistics,</wd>

<space/>

<wd l="4934" t="9581" r="5309" b="9720">ACL</wd>

<space/>

<wd l="5357" t="9576" r="5784" b="9744">2014,</wd>

<space/>

</ln>

<ln l="1651" t="9797" r="5798" b="9965" baseLine="9931" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9802" r="2026" b="9941">June</wd>

<space/>

<wd l="2088" t="9797" r="2582" b="9965">22-27,</wd>

<space/>

<wd l="2669" t="9797" r="3101" b="9965">2014,</wd>

<space/>

<wd l="3182" t="9797" r="4003" b="9965">Baltimore,</wd>

<space/>

<wd l="4085" t="9802" r="4426" b="9965">MD,</wd>

<space/>

<wd l="4531" t="9802" r="4906" b="9965">USA,</wd>

<space/>

<wd l="5006" t="9797" r="5573" b="9941">Volume</wd>

<space/>

<wd l="5640" t="9797" r="5798" b="9941">2:</wd>

<space/>

</ln>

<ln l="1656" t="10018" r="4027" b="10205" baseLine="10152">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="10018" r="2098" b="10162">Short</wd>

<space/>

</run>

<wd l="2141" t="10022" r="2731" b="10200"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Papers</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2784" t="10061" r="3235" b="10205">pages</wd>

<space/>

<wd l="3293" t="10018" r="4027" b="10162">680–686.</wd>

</run>

</ln>

</para>

<para l="1440" t="10382" r="5798" b="11914" alignment="justified" li="216" spaceBefore="179" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="10382" r="5798" b="10603" baseLine="10549">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1440" t="10416" r="2083" b="10560">Jennifer</wd>

<space/>

<wd l="2179" t="10421" r="2707" b="10584">Foster,</wd>

<space/>

</run>

<wd l="2827" t="10382" r="3355" b="10560"><run underlined="none" subsuperscript="superscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">¨</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Ozlem</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="3451" t="10416" r="4277" b="10603"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">C</run>

<run underlined="none" subsuperscript="subscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">¸</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">etinoglu,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4387" t="10416" r="5059" b="10560">Joachim</wd>

<space/>

<wd l="5150" t="10421" r="5798" b="10603">Wagner,</wd>

<space/>

</run>

</ln>

<ln l="1656" t="10637" r="5798" b="10824" baseLine="10771" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="10637" r="2203" b="10819">Joseph</wd>

<space/>

<wd l="2299" t="10642" r="2506" b="10781">Le</wd>

<space/>

<wd l="2606" t="10642" r="3082" b="10805">Roux,</wd>

<space/>

<wd l="3206" t="10637" r="3845" b="10819">Stephen</wd>

<space/>

<wd l="3941" t="10642" r="4517" b="10824">Hogan,</wd>

<space/>

<wd l="4632" t="10637" r="5213" b="10781">Joakim</wd>

<space/>

<wd l="5309" t="10637" r="5798" b="10805">Nivre,</wd>

<space/>

</ln>

<ln l="1656" t="10853" r="5794" b="11040" baseLine="10992" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="10853" r="2266" b="10997">Deirdre</wd>

<space/>

<wd l="2366" t="10858" r="2942" b="11040">Hogan,</wd>

<space/>

<wd l="3067" t="10853" r="3350" b="10997">and</wd>

<space/>

<wd l="3451" t="10853" r="3869" b="10997">Josef</wd>

<space/>

<wd l="3960" t="10896" r="4238" b="10997">van</wd>

<space/>

<wd l="4349" t="10853" r="5112" b="10997">Genabith.</wd>

<space/>

<wd l="5362" t="10853" r="5794" b="10997">2011.</wd>

<space/>

</ln>

<ln l="1656" t="11074" r="5798" b="11261" baseLine="11208" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="11074" r="2726" b="11256">#hardtoparse:</wd>

<space/>

<wd l="2832" t="11074" r="3187" b="11218">POS</wd>

<space/>

<wd l="3259" t="11074" r="3850" b="11261">tagging</wd>

<space/>

<wd l="3922" t="11074" r="4205" b="11218">and</wd>

<space/>

<wd l="4267" t="11074" r="4853" b="11261">parsing</wd>

<space/>

<wd l="4915" t="11074" r="5160" b="11218">the</wd>

<space/>

<wd l="5222" t="11074" r="5798" b="11218">twitter-</wd>

</ln>

<ln l="1656" t="11294" r="5789" b="11477" baseLine="11429">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="11338" r="2112" b="11438">verse.</wd>

<space/>

<wd l="2198" t="11299" r="2366" b="11434">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2395" t="11294" r="3202" b="11477">Analyzing</wd>

<space/>

<wd l="3250" t="11299" r="4042" b="11462">Microtext,</wd>

<space/>

<wd l="4114" t="11299" r="4656" b="11477">Papers</wd>

<space/>

<wd l="4670" t="11294" r="5074" b="11477">from</wd>

<space/>

<wd l="5122" t="11294" r="5357" b="11438">the</wd>

<space/>

<wd l="5405" t="11294" r="5789" b="11438">2011</wd>

<space/>

</run>

</ln>

<ln l="1642" t="11510" r="5784" b="11693" baseLine="11650" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="11515" r="2102" b="11650">AAAI</wd>

<space/>

<wd l="2184" t="11510" r="2990" b="11693">Workshop,</wd>

<space/>

<wd l="3101" t="11515" r="3398" b="11654">San</wd>

<space/>

<wd l="3485" t="11515" r="4306" b="11678">Francisco,</wd>

<space/>

<wd l="4430" t="11510" r="5280" b="11693">California,</wd>

<space/>

<wd l="5410" t="11515" r="5784" b="11678">USA,</wd>

<space/>

</ln>

<ln l="1642" t="11731" r="2894" b="11914" baseLine="11866">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="11736" r="2213" b="11914">August</wd>

<space/>

<wd l="2261" t="11731" r="2386" b="11899">8,</wd>

<space/>

</run>

<wd l="2458" t="11731" r="2894" b="11875"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2011</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1440" t="12130" r="5837" b="13632" alignment="justified" li="216" spaceBefore="182" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="12130" r="5798" b="12317" baseLine="12269" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="12134" r="1670" b="12274">Bo</wd>

<space/>

<wd l="1728" t="12134" r="2054" b="12274">Han</wd>

<space/>

<wd l="2117" t="12130" r="2400" b="12274">and</wd>

<space/>

<wd l="2453" t="12130" r="3130" b="12317">Timothy</wd>

<space/>

<wd l="3187" t="12130" r="3898" b="12274">Baldwin.</wd>

<space/>

<wd l="3998" t="12130" r="4430" b="12274">2011.</wd>

<space/>

<wd l="4526" t="12130" r="5117" b="12274">Lexical</wd>

<space/>

<wd l="5174" t="12130" r="5798" b="12274">normal-</wd>

</ln>

<ln l="1656" t="12350" r="5798" b="12538" baseLine="12485" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="12350" r="2189" b="12494">isation</wd>

<space/>

<wd l="2261" t="12350" r="2434" b="12494">of</wd>

<space/>

<wd l="2501" t="12350" r="2894" b="12494">short</wd>

<space/>

<wd l="2957" t="12370" r="3254" b="12494">text</wd>

<space/>

<wd l="3322" t="12394" r="4114" b="12538">messages:</wd>

<space/>

<wd l="4229" t="12350" r="4694" b="12494">Makn</wd>

<space/>

<wd l="4771" t="12394" r="5098" b="12494">sens</wd>

<space/>

<wd l="5179" t="12394" r="5266" b="12494">a</wd>

<space/>

<wd l="5328" t="12350" r="5798" b="12494">#twit-</wd>

</ln>

<ln l="1656" t="12566" r="5837" b="12749" baseLine="12706">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="12586" r="1896" b="12710">ter.</wd>

<space/>

<wd l="2040" t="12571" r="2203" b="12706">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2270" t="12566" r="3259" b="12749">Proceedings</wd>

<space/>

<wd l="3331" t="12566" r="3514" b="12749">of</wd>

<space/>

<wd l="3557" t="12566" r="3792" b="12710">the</wd>

<space/>

<wd l="3864" t="12566" r="4214" b="12710">49th</wd>

<space/>

<wd l="4272" t="12566" r="4862" b="12710">Annual</wd>

<space/>

<wd l="4925" t="12571" r="5578" b="12749">Meeting</wd>

<space/>

<wd l="5654" t="12566" r="5837" b="12749">of</wd>

<space/>

</run>

</ln>

<ln l="1661" t="12787" r="5798" b="12970" baseLine="12922" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1661" t="12787" r="1901" b="12931">the</wd>

<space/>

<wd l="1944" t="12792" r="2885" b="12931">Association</wd>

<space/>

<wd l="2909" t="12787" r="3182" b="12970">for</wd>

<space/>

<wd l="3240" t="12787" r="4430" b="12970">Computational</wd>

<space/>

<wd l="4478" t="12792" r="5414" b="12970">Linguistics:</wd>

<space/>

<wd l="5491" t="12792" r="5798" b="12931">Hu-</wd>

</ln>

<ln l="1656" t="12989" r="5798" b="13190" baseLine="13142">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="13056" r="1997" b="13152">man</wd>

<space/>

<wd l="2064" t="13013" r="2861" b="13190">Language</wd>

<space/>

<wd l="2942" t="13008" r="3970" b="13190">Technologies</wd>

<space/>

<wd l="4051" t="13094" r="4099" b="13109">-</wd>

<space/>

<wd l="4190" t="13008" r="4757" b="13152">Volume</wd>

<space/>

</run>

<wd l="4838" t="13008" r="4973" b="13176"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5054" t="13013" r="5419" b="13147">HLT</wd>

<space/>

<wd l="5506" t="13008" r="5798" b="13176">’11,</wd>

<space/>

</run>

</ln>

<ln l="1656" t="13224" r="5808" b="13411" baseLine="13363" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="13267" r="2107" b="13411">pages</wd>

<space/>

<wd l="2170" t="13224" r="2904" b="13392">368–378,</wd>

<space/>

<wd l="2966" t="13224" r="3970" b="13411">Stroudsburg,</wd>

<space/>

<wd l="4027" t="13229" r="4310" b="13392">PA,</wd>

<space/>

<wd l="4363" t="13224" r="4800" b="13368">USA.</wd>

<space/>

<wd l="4862" t="13224" r="5808" b="13368">Association</wd>

<space/>

</ln>

<ln l="1656" t="13445" r="4099" b="13632" baseLine="13579" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="13445" r="1891" b="13589">for</wd>

<space/>

<wd l="1944" t="13445" r="3120" b="13627">Computational</wd>

<space/>

<wd l="3173" t="13445" r="4099" b="13632">Linguistics.</wd>

</ln>

</para>

<para l="1440" t="13843" r="5803" b="14688" alignment="justified" li="216" spaceBefore="183" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="13843" r="5803" b="14030" baseLine="13982" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="13843" r="1819" b="13987">John</wd>

<space/>

<wd l="1901" t="13848" r="2083" b="13987">D.</wd>

<space/>

<wd l="2179" t="13843" r="2861" b="14030">Lafferty,</wd>

<space/>

<wd l="2962" t="13843" r="3590" b="13987">Andrew</wd>

<space/>

<wd l="3682" t="13843" r="4579" b="14011">McCallum,</wd>

<space/>

<wd l="4685" t="13843" r="4968" b="13987">and</wd>

<space/>

<wd l="5054" t="13843" r="5803" b="13987">Fernando</wd>

<space/>

</ln>

<ln l="1661" t="14064" r="5789" b="14208" baseLine="14198" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1661" t="14064" r="1829" b="14208">C.</wd>

<space/>

<wd l="1910" t="14069" r="2093" b="14208">N.</wd>

<space/>

<wd l="2174" t="14064" r="2774" b="14208">Pereira.</wd>

<space/>

<wd l="2928" t="14064" r="3360" b="14208">2001.</wd>

<space/>

<wd l="3514" t="14064" r="4445" b="14208">Conditional</wd>

<space/>

<wd l="4517" t="14064" r="5198" b="14208">Random</wd>

<space/>

<wd l="5261" t="14064" r="5789" b="14208">Fields:</wd>

<space/>

</ln>

<ln l="1656" t="14285" r="5803" b="14472" baseLine="14419" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="14285" r="2664" b="14429">Probabilistic</wd>

<space/>

<wd l="2741" t="14285" r="3331" b="14429">Models</wd>

<space/>

<wd l="3408" t="14285" r="3643" b="14429">for</wd>

<space/>

<wd l="3720" t="14285" r="4661" b="14472">Segmenting</wd>

<space/>

<wd l="4742" t="14285" r="5026" b="14429">and</wd>

<space/>

<wd l="5098" t="14285" r="5803" b="14472">Labeling</wd>

<space/>

</ln>

<ln l="1661" t="14501" r="5002" b="14688" baseLine="14640">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1661" t="14501" r="2419" b="14683">Sequence</wd>

<space/>

<wd l="2472" t="14506" r="2885" b="14645">Data.</wd>

<space/>

<wd l="2971" t="14506" r="3134" b="14640">In</wd>

<space/>

</run>

<wd l="3182" t="14506" r="3706" b="14669"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ICML</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3758" t="14544" r="4210" b="14688">pages</wd>

<space/>

<wd l="4267" t="14501" r="5002" b="14645">282–289.</wd>

</run>

</ln>

</para>

<para l="1440" t="14899" r="5803" b="15307" alignment="justified" li="216" spaceBefore="177" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="14899" r="5803" b="15086" baseLine="15038" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="14899" r="2117" b="15043">Xiaohua</wd>

<space/>

<wd l="2198" t="14899" r="2515" b="15067">Liu,</wd>

<space/>

<wd l="2621" t="14899" r="3350" b="15043">Shaodian</wd>

<space/>

<wd l="3437" t="14899" r="3989" b="15086">Zhang,</wd>

<space/>

<wd l="4090" t="14904" r="4459" b="15043">Furu</wd>

<space/>

<wd l="4546" t="14899" r="4906" b="15067">Wei,</wd>

<space/>

<wd l="5006" t="14899" r="5294" b="15043">and</wd>

<space/>

<wd l="5376" t="14899" r="5803" b="15086">Ming</wd>

<space/>

</ln>

<ln l="1656" t="15120" r="5794" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="15120" r="2117" b="15264">Zhou.</wd>

<space/>

<wd l="2222" t="15120" r="2654" b="15264">2011.</wd>

<space/>

<wd l="2755" t="15120" r="3758" b="15307">Recognizing</wd>

<space/>

<wd l="3821" t="15120" r="4349" b="15264">named</wd>

<space/>

<wd l="4411" t="15120" r="4978" b="15264">entities</wd>

<space/>

<wd l="5035" t="15120" r="5194" b="15259">in</wd>

<space/>

<wd l="5246" t="15139" r="5794" b="15264">tweets.</wd>

</ln>

</para>

</column>

<column l="6129" t="3698" r="10531" b="10203">

<para l="6350" t="3773" r="10512" b="4834" alignment="justified" li="216" spaceBefore="44" lsp="exactly" lspExact="219" language="en">

<ln l="6365" t="3773" r="10507" b="3955" baseLine="3907">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="3778" r="6533" b="3912">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6605" t="3773" r="7594" b="3955">Proceedings</wd>

<space/>

<wd l="7675" t="3773" r="7858" b="3955">of</wd>

<space/>

<wd l="7906" t="3773" r="8146" b="3917">the</wd>

<space/>

<wd l="8222" t="3773" r="8573" b="3917">49th</wd>

<space/>

<wd l="8640" t="3773" r="9230" b="3917">Annual</wd>

<space/>

<wd l="9302" t="3778" r="9955" b="3955">Meeting</wd>

<space/>

<wd l="10042" t="3773" r="10224" b="3955">of</wd>

<space/>

<wd l="10272" t="3773" r="10507" b="3917">the</wd>

<space/>

</run>

</ln>

<ln l="6350" t="3989" r="10507" b="4171" baseLine="4128" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="3994" r="7291" b="4133">Association</wd>

<space/>

<wd l="7320" t="3989" r="7594" b="4171">for</wd>

<space/>

<wd l="7656" t="3989" r="8842" b="4171">Computational</wd>

<space/>

<wd l="8894" t="3994" r="9835" b="4171">Linguistics:</wd>

<space/>

<wd l="9922" t="3994" r="10507" b="4133">Human</wd>

<space/>

</ln>

<ln l="6360" t="4205" r="10507" b="4397" baseLine="4344">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="4214" r="7157" b="4392">Language</wd>

<space/>

<wd l="7219" t="4210" r="8246" b="4392">Technologies</wd>

<space/>

<wd l="8314" t="4296" r="8362" b="4310">-</wd>

<space/>

<wd l="8438" t="4210" r="9005" b="4354">Volume</wd>

<space/>

</run>

<wd l="9072" t="4210" r="9206" b="4378"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9264" t="4214" r="9634" b="4349">HLT</wd>

<space/>

<wd l="9701" t="4210" r="9998" b="4378">’11,</wd>

<space/>

<wd l="10056" t="4253" r="10507" b="4397">pages</wd>

<space/>

</run>

</ln>

<ln l="6370" t="4430" r="10512" b="4618" baseLine="4565" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="4430" r="7104" b="4598">359–367,</wd>

<space/>

<wd l="7219" t="4430" r="8222" b="4618">Stroudsburg,</wd>

<space/>

<wd l="8328" t="4435" r="8611" b="4598">PA,</wd>

<space/>

<wd l="8707" t="4430" r="9144" b="4574">USA.</wd>

<space/>

<wd l="9245" t="4430" r="10190" b="4574">Association</wd>

<space/>

<wd l="10277" t="4430" r="10512" b="4574">for</wd>

<space/>

</ln>

<ln l="6370" t="4646" r="8520" b="4834" baseLine="4786" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="4646" r="7546" b="4829">Computational</wd>

<space/>

<wd l="7598" t="4646" r="8520" b="4834">Linguistics.</wd>

</ln>

</para>

<para l="6144" t="5045" r="10512" b="6547" alignment="justified" li="216" spaceBefore="180" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="5045" r="10512" b="5232" baseLine="5184" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5045" r="6398" b="5189">Fei</wd>

<space/>

<wd l="6490" t="5045" r="6806" b="5213">Liu,</wd>

<space/>

<wd l="6912" t="5045" r="7517" b="5232">Fuliang</wd>

<space/>

<wd l="7608" t="5050" r="8112" b="5232">Weng,</wd>

<space/>

<wd l="8218" t="5045" r="8501" b="5189">and</wd>

<space/>

<wd l="8587" t="5045" r="8971" b="5189">Xiao</wd>

<space/>

<wd l="9062" t="5045" r="9523" b="5232">Jiang.</wd>

<space/>

<wd l="9730" t="5045" r="10162" b="5189">2012.</wd>

<space/>

<wd l="10368" t="5050" r="10512" b="5184">A</wd>

<space/>

</ln>

<ln l="6365" t="5266" r="10502" b="5453" baseLine="5400" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="5266" r="7594" b="5453">broad-coverage</wd>

<space/>

<wd l="7646" t="5266" r="8755" b="5410">normalization</wd>

<space/>

<wd l="8813" t="5285" r="9360" b="5453">system</wd>

<space/>

<wd l="9403" t="5266" r="9638" b="5410">for</wd>

<space/>

<wd l="9696" t="5266" r="10147" b="5410">social</wd>

<space/>

<wd l="10200" t="5309" r="10502" b="5410">me-</wd>

</ln>

<ln l="6370" t="5486" r="10512" b="5674" baseLine="5621">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6370" t="5486" r="6610" b="5630">dia</wd>

<space/>

<wd l="6686" t="5486" r="7445" b="5674">language.</wd>

<space/>

<wd l="7627" t="5491" r="7795" b="5626">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7872" t="5486" r="8861" b="5669">Proceedings</wd>

<space/>

<wd l="8947" t="5486" r="9125" b="5669">of</wd>

<space/>

<wd l="9182" t="5486" r="9418" b="5630">the</wd>

<space/>

<wd l="9499" t="5486" r="9850" b="5630">50th</wd>

<space/>

<wd l="9922" t="5486" r="10512" b="5630">Annual</wd>

<space/>

</run>

</ln>

<ln l="6360" t="5702" r="10502" b="5885" baseLine="5842" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="5707" r="7013" b="5885">Meeting</wd>

<space/>

<wd l="7090" t="5702" r="7272" b="5885">of</wd>

<space/>

<wd l="7310" t="5702" r="7550" b="5846">the</wd>

<space/>

<wd l="7603" t="5707" r="8544" b="5846">Association</wd>

<space/>

<wd l="8582" t="5702" r="8856" b="5885">for</wd>

<space/>

<wd l="8923" t="5702" r="10114" b="5885">Computational</wd>

<space/>

<wd l="10176" t="5707" r="10502" b="5846">Lin-</wd>

</ln>

<ln l="6365" t="5918" r="10507" b="6110" baseLine="6058">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="5928" r="7032" b="6106">guistics:</wd>

<space/>

<wd l="7128" t="5928" r="7536" b="6106">Long</wd>

<space/>

<wd l="7603" t="5928" r="8150" b="6106">Papers</wd>

<space/>

<wd l="8227" t="6010" r="8275" b="6024">-</wd>

<space/>

<wd l="8362" t="5923" r="8933" b="6067">Volume</wd>

<space/>

</run>

<wd l="9010" t="5923" r="9144" b="6091"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9216" t="5923" r="9605" b="6067">ACL</wd>

<space/>

<wd l="9686" t="5923" r="9984" b="6091">’12,</wd>

<space/>

<wd l="10056" t="5966" r="10507" b="6110">pages</wd>

<space/>

</run>

</ln>

<ln l="6384" t="6144" r="10512" b="6331" baseLine="6278" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6384" t="6144" r="7306" b="6312">1035–1044,</wd>

<space/>

<wd l="7373" t="6144" r="8376" b="6331">Stroudsburg,</wd>

<space/>

<wd l="8438" t="6149" r="8717" b="6312">PA,</wd>

<space/>

<wd l="8779" t="6144" r="9216" b="6288">USA.</wd>

<space/>

<wd l="9283" t="6144" r="10224" b="6288">Association</wd>

<space/>

<wd l="10277" t="6144" r="10512" b="6288">for</wd>

<space/>

</ln>

<ln l="6370" t="6360" r="8520" b="6547" baseLine="6499" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="6360" r="7546" b="6542">Computational</wd>

<space/>

<wd l="7598" t="6360" r="8520" b="6547">Linguistics.</wd>

</ln>

</para>

<para l="6144" t="6758" r="10502" b="8261" alignment="justified" li="216" spaceBefore="181" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="6758" r="10502" b="6902" baseLine="6898" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="6758" r="6542" b="6902">Jordi</wd>

<space/>

<wd l="6614" t="6763" r="7042" b="6902">Porta</wd>

<space/>

<wd l="7114" t="6758" r="7397" b="6902">and</wd>

<space/>

<wd l="7469" t="6758" r="8227" b="6902">Jos´e-Luis</wd>

<space/>

<wd l="8309" t="6758" r="8928" b="6902">Sancho.</wd>

<space/>

<wd l="9086" t="6758" r="9518" b="6902">2013.</wd>

<space/>

<wd l="9672" t="6758" r="10114" b="6902">Word</wd>

<space/>

<wd l="10181" t="6802" r="10502" b="6902">nor-</wd>

</ln>

<ln l="6365" t="6979" r="10498" b="7166" baseLine="7114" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="6979" r="7205" b="7123">malization</wd>

<space/>

<wd l="7286" t="6979" r="7440" b="7118">in</wd>

<space/>

<wd l="7522" t="6979" r="8040" b="7123">twitter</wd>

<space/>

<wd l="8122" t="6979" r="8549" b="7166">using</wd>

<space/>

<wd l="8635" t="6979" r="9470" b="7123">finite-state</wd>

<space/>

<wd l="9552" t="6979" r="10498" b="7123">transducers.</wd>

<space/>

</ln>

<ln l="6365" t="7200" r="10502" b="7382" baseLine="7334">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="7205" r="6533" b="7339">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6614" t="7200" r="7598" b="7382">Proceedings</wd>

<space/>

<wd l="7690" t="7200" r="7872" b="7382">of</wd>

<space/>

<wd l="7930" t="7200" r="8165" b="7344">the</wd>

<space/>

<wd l="8261" t="7205" r="8717" b="7344">Tweet</wd>

<space/>

<wd l="8789" t="7200" r="9946" b="7358">Normalization</wd>

<space/>

<wd l="10042" t="7200" r="10502" b="7344">Work-</wd>

</run>

</ln>

<ln l="6365" t="7416" r="10502" b="7598" baseLine="7555" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="7416" r="6734" b="7598">shop</wd>

<space/>

<wd l="6811" t="7416" r="7651" b="7560">co-located</wd>

<space/>

<wd l="7714" t="7416" r="8050" b="7560">with</wd>

<space/>

<wd l="8122" t="7416" r="8472" b="7560">29th</wd>

<space/>

<wd l="8549" t="7416" r="9451" b="7598">Conference</wd>

<space/>

<wd l="9523" t="7416" r="9701" b="7598">of</wd>

<space/>

<wd l="9744" t="7416" r="9979" b="7560">the</wd>

<space/>

<wd l="10046" t="7421" r="10502" b="7598">Span-</wd>

</ln>

<ln l="6374" t="7637" r="10502" b="7819" baseLine="7771" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6374" t="7637" r="6590" b="7781">ish</wd>

<space/>

<wd l="6672" t="7642" r="7248" b="7819">Society</wd>

<space/>

<wd l="7291" t="7637" r="7570" b="7819">for</wd>

<space/>

<wd l="7632" t="7637" r="8256" b="7781">Natural</wd>

<space/>

<wd l="8328" t="7642" r="9125" b="7819">Language</wd>

<space/>

<wd l="9202" t="7642" r="10075" b="7819">Processing</wd>

<space/>

<wd l="10162" t="7642" r="10502" b="7814">(SE-</wd>

</ln>

<ln l="6365" t="7858" r="10502" b="8040" baseLine="7992">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="7862" r="6744" b="8002">PLN</wd>

<space/>

<wd l="6787" t="7858" r="7282" b="8035">2013),</wd>

<space/>

<wd l="7363" t="7858" r="7992" b="8026">Madrid,</wd>

<space/>

<wd l="8078" t="7862" r="8558" b="8040">Spain,</wd>

<space/>

<wd l="8645" t="7858" r="9494" b="8040">September</wd>

<space/>

<wd l="9547" t="7858" r="9931" b="8026">20th,</wd>

<space/>

</run>

<wd l="10013" t="7858" r="10502" b="8026"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2013.</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6365" t="8074" r="7402" b="8261" baseLine="8213" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="8117" r="6811" b="8261">pages</wd>

<space/>

<wd l="6869" t="8074" r="7402" b="8218">49–53.</wd>

</ln>

</para>

<para l="6144" t="8472" r="10512" b="10195" alignment="justified" li="216" spaceBefore="179" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="8472" r="10512" b="8659" baseLine="8611" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="8472" r="6706" b="8659">Pidong</wd>

<space/>

<wd l="6749" t="8477" r="7210" b="8659">Wang</wd>

<space/>

<wd l="7258" t="8472" r="7541" b="8616">and</wd>

<space/>

<wd l="7579" t="8477" r="8045" b="8616">Hwee</wd>

<space/>

<wd l="8083" t="8477" r="8386" b="8616">Tou</wd>

<space/>

<wd l="8434" t="8477" r="8712" b="8659">Ng.</wd>

<space/>

<wd l="8803" t="8472" r="9235" b="8616">2013.</wd>

<space/>

<wd l="9322" t="8477" r="9466" b="8611">A</wd>

<space/>

<wd l="9504" t="8472" r="10512" b="8616">beam-search</wd>

<space/>

</ln>

<ln l="6370" t="8693" r="10512" b="8837" baseLine="8827" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6370" t="8693" r="6994" b="8837">decoder</wd>

<space/>

<wd l="7051" t="8693" r="7286" b="8837">for</wd>

<space/>

<wd l="7344" t="8693" r="8453" b="8837">normalization</wd>

<space/>

<wd l="8515" t="8693" r="8683" b="8837">of</wd>

<space/>

<wd l="8741" t="8693" r="9197" b="8837">social</wd>

<space/>

<wd l="9259" t="8693" r="9749" b="8837">media</wd>

<space/>

<wd l="9802" t="8712" r="10099" b="8837">text</wd>

<space/>

<wd l="10157" t="8693" r="10512" b="8837">with</wd>

<space/>

</ln>

<ln l="6370" t="8914" r="10502" b="9096" baseLine="9048">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6370" t="8914" r="7248" b="9096">application</wd>

<space/>

<wd l="7310" t="8933" r="7459" b="9058">to</wd>

<space/>

<wd l="7526" t="8914" r="8198" b="9058">machine</wd>

<space/>

<wd l="8261" t="8914" r="9144" b="9058">translation.</wd>

<space/>

<wd l="9259" t="8918" r="9427" b="9053">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9480" t="8918" r="10070" b="9058">Human</wd>

<space/>

<wd l="10128" t="8918" r="10502" b="9058">Lan-</wd>

</run>

</ln>

<ln l="6365" t="9130" r="10502" b="9312" baseLine="9269" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6365" t="9178" r="6845" b="9312">guage</wd>

<space/>

<wd l="6902" t="9130" r="7992" b="9312">Technologies:</wd>

<space/>

<wd l="8069" t="9130" r="8966" b="9312">Conference</wd>

<space/>

<wd l="9019" t="9130" r="9202" b="9312">of</wd>

<space/>

<wd l="9221" t="9130" r="9456" b="9274">the</wd>

<space/>

<wd l="9499" t="9130" r="9970" b="9274">North</wd>

<space/>

<wd l="10003" t="9134" r="10502" b="9274">Amer-</wd>

</ln>

<ln l="6374" t="9350" r="10512" b="9533" baseLine="9485" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6374" t="9355" r="6706" b="9494">ican</wd>

<space/>

<wd l="6797" t="9350" r="7450" b="9533">Chapter</wd>

<space/>

<wd l="7522" t="9350" r="7704" b="9533">of</wd>

<space/>

<wd l="7757" t="9350" r="7992" b="9494">the</wd>

<space/>

<wd l="8059" t="9355" r="9000" b="9494">Association</wd>

<space/>

<wd l="9086" t="9350" r="9269" b="9533">of</wd>

<space/>

<wd l="9326" t="9350" r="10512" b="9533">Computational</wd>

<space/>

</ln>

<ln l="6360" t="9571" r="10507" b="9754" baseLine="9706" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6360" t="9576" r="7267" b="9754">Linguistics,</wd>

<space/>

<wd l="7378" t="9571" r="8390" b="9754">Proceedings,</wd>

<space/>

<wd l="8496" t="9576" r="8866" b="9715">June</wd>

<space/>

<wd l="8947" t="9571" r="9341" b="9739">9-14,</wd>

<space/>

<wd l="9451" t="9571" r="9878" b="9739">2013,</wd>

<space/>

<wd l="9998" t="9576" r="10507" b="9715">Westin</wd>

<space/>

</ln>

<ln l="6365" t="9787" r="10502" b="9970" baseLine="9926">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6365" t="9787" r="7142" b="9931">Peachtree</wd>

<space/>

<wd l="7267" t="9787" r="7718" b="9946">Plaza</wd>

<space/>

<wd l="7838" t="9787" r="8314" b="9955">Hotel,</wd>

<space/>

<wd l="8458" t="9787" r="9091" b="9955">Atlanta,</wd>

<space/>

<wd l="9259" t="9792" r="9936" b="9970">Georgia,</wd>

<space/>

</run>

<wd l="10114" t="9792" r="10502" b="9955"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">USA</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="6365" t="10008" r="7603" b="10195" baseLine="10142" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6365" t="10051" r="6811" b="10195">pages</wd>

<space/>

<wd l="6869" t="10008" r="7603" b="10152">471–481.</wd>

</ln>

</para>

</column>

</section>

<dd l="5738" t="15746" r="6233" b="15975">

<para l="5771" t="15792" r="6200" b="15946" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5837" t="15792" r="6134" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="15">

<wd l="5837" t="15792" r="6134" b="15946">110</wd>

</ln>

</para>

</dd>

</body>

</page>

</document>

