<?xml version="1.0" encoding="UTF-16"?>

<!--XML document generated using OCR technology from Nuance Communications, Inc.-->

<document xmlns="http://www.scansoft.com/omnipage/xml/ssdoc-schema3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4318.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1433" marginTop="1340" marginRight="1378" marginBottom="358" offsetX="-26" offsetY="12" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1433" t="1340" r="10531" b="2244">

<column l="1433" t="1340" r="10531" b="2244">

<para l="1613" t="1406" r="10334" b="1987" alignment="centered" spaceBefore="17" spaceAfter="231" lsp="exactly" lspExact="322" language="en">

<ln l="1613" t="1406" r="10334" b="1670" baseLine="1603" bold="true" underlined="none" subsuperscript="none" fontSize="1450" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="1613" t="1406" r="3053" b="1618">USZEGED:</wd>

<space/>

<wd l="3158" t="1406" r="4483" b="1618">Correction</wd>

<space/>

<wd l="4560" t="1406" r="6278" b="1670">Type-sensitive</wd>

<space/>

<wd l="6350" t="1406" r="8122" b="1618">Normalization</wd>

<space/>

<wd l="8198" t="1406" r="8453" b="1613">of</wd>

<space/>

<wd l="8506" t="1406" r="9422" b="1670">English</wd>

<space/>

<wd l="9504" t="1411" r="10334" b="1613">Tweets
</wd>

</ln>

<ln l="3355" t="1723" r="8592" b="1987" baseLine="1925" bold="true" underlined="none" subsuperscript="none" fontSize="1450" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="3355" t="1723" r="4051" b="1987">Using</wd>

<space/>

<wd l="4128" t="1723" r="5381" b="1987">Efficiently</wd>

<space/>

<wd l="5462" t="1728" r="6442" b="1930">Indexed</wd>

<space/>

<wd l="6523" t="1786" r="7421" b="1987">n-gram</wd>

<space/>

<wd l="7507" t="1723" r="8592" b="1934">Statistics</wd>

</ln>

</para>

</column>

</section>

<section l="2261" t="2244" r="10109" b="3653">

<column l="2261" t="2244" r="5707" b="3653">

<para l="2261" t="2285" r="5702" b="3648" alignment="centered" lsp="exactly" lspExact="279" language="en">

<ln l="3254" t="2285" r="4714" b="2467" baseLine="2458" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-9" forcedEOF="true">

<wd l="3254" t="2285" r="3917" b="2467">G´abor</wd>

<space/>

<wd l="3974" t="2299" r="4714" b="2467">Berend
</wd>

</ln>

<ln l="2981" t="2573" r="4987" b="2794" baseLine="2736" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-9" forcedEOF="true">

<wd l="2981" t="2578" r="3974" b="2794">University</wd>

<space/>

<wd l="4046" t="2578" r="4258" b="2746">of</wd>

<space/>

<wd l="4310" t="2578" r="4987" b="2794">Szeged
</wd>

</ln>

<ln l="2707" t="2851" r="5251" b="3072" baseLine="3019" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-9" forcedEOF="true">

<wd l="2707" t="2861" r="3840" b="3072">Department</wd>

<space/>

<wd l="3902" t="2856" r="4114" b="3024">of</wd>

<space/>

<wd l="4157" t="2856" r="5251" b="3024">Informatics
</wd>

</ln>

<ln l="2261" t="3106" r="5702" b="3370" baseLine="3315" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-9" forcedEOF="true">

<wd l="2261" t="3106" r="2856" b="3370">´Arp´ad</wd>

<space/>

<wd l="2918" t="3154" r="3173" b="3322">t´er</wd>

<space/>

<wd l="3235" t="3154" r="3456" b="3350">2.,</wd>

<space/>

<wd l="3533" t="3149" r="4003" b="3322">6720</wd>

<space/>

<wd l="4075" t="3154" r="4800" b="3370">Szeged,</wd>

<space/>

<wd l="4872" t="3158" r="5702" b="3370">Hungary
</wd>

</ln>

<ln l="2338" t="3446" r="5621" b="3648" baseLine="3595" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-9" forcedEOF="true">

<wd l="2338" t="3446" r="5621" b="3648">berendg@inf.u-szeged.hu</wd>

</ln>

</para>

</column>

<column l="5832" t="2244" r="10109" b="3653">

<para l="5832" t="2285" r="10104" b="3648" alignment="centered" lsp="exactly" lspExact="279" language="en">

<ln l="7243" t="2285" r="8683" b="2467" baseLine="2458" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-3" forcedEOF="true">

<wd l="7243" t="2294" r="7824" b="2467">Ervin</wd>

<space/>

<wd l="7891" t="2285" r="8683" b="2467">Tasn´adi
</wd>

</ln>

<ln l="6965" t="2578" r="8971" b="2794" baseLine="2736" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-3" forcedEOF="true">

<wd l="6965" t="2578" r="7958" b="2794">University</wd>

<space/>

<wd l="8030" t="2578" r="8242" b="2746">of</wd>

<space/>

<wd l="8294" t="2578" r="8971" b="2794">Szeged
</wd>

</ln>

<ln l="6691" t="2856" r="9235" b="3072" baseLine="3019" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-3" forcedEOF="true">

<wd l="6691" t="2861" r="7824" b="3072">Department</wd>

<space/>

<wd l="7886" t="2856" r="8098" b="3024">of</wd>

<space/>

<wd l="8141" t="2856" r="9235" b="3024">Informatics
</wd>

</ln>

<ln l="6245" t="3106" r="9686" b="3370" baseLine="3315" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="-3" forcedEOF="true">

<wd l="6245" t="3106" r="6840" b="3370">´Arp´ad</wd>

<space/>

<wd l="6902" t="3154" r="7157" b="3322">t´er</wd>

<space/>

<wd l="7219" t="3154" r="7440" b="3350">2.,</wd>

<space/>

<wd l="7517" t="3149" r="7987" b="3322">6720</wd>

<space/>

<wd l="8059" t="3154" r="8784" b="3370">Szeged,</wd>

<space/>

<wd l="8856" t="3158" r="9686" b="3370">Hungary
</wd>

</ln>

<ln l="5832" t="3442" r="10104" b="3648" baseLine="3595" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-3" forcedEOF="true">

<wd l="5832" t="3446" r="10104" b="3648">Tasnadi.Ervin@stud.u-szeged.hu</wd>

</ln>

</para>

</column>

</section>

<section l="1433" t="4092" r="10531" b="15316">

<column l="1433" t="4092" r="5820" b="15316">

<para l="3178" t="4142" r="4070" b="4315" alignment="centered" spaceBefore="3" lsp="exactly" lspExact="273" language="en">

<ln l="3178" t="4142" r="4070" b="4315" baseLine="4306" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3178" t="4142" r="4070" b="4315">Abstract</wd>

</ln>

</para>

<para l="1781" t="4666" r="5477" b="10018" alignment="justified" li="288" ri="360" spaceBefore="233" lsp="exactly" lspExact="271" language="en">

<ln l="1781" t="4666" r="5453" b="4867" baseLine="4814" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="4666" r="2160" b="4824">This</wd>

<space/>

<wd l="2270" t="4718" r="2760" b="4867">paper</wd>

<space/>

<wd l="2861" t="4666" r="3658" b="4824">describes</wd>

<space/>

<wd l="3768" t="4666" r="4032" b="4824">the</wd>

<space/>

<wd l="4138" t="4666" r="5088" b="4824">framework</wd>

<space/>

<wd l="5194" t="4718" r="5453" b="4867">ap-</wd>

</ln>

<ln l="1781" t="4934" r="5462" b="5136" baseLine="5088" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="4934" r="2218" b="5136">plied</wd>

<space/>

<wd l="2290" t="4934" r="2506" b="5136">by</wd>

<space/>

<wd l="2582" t="4958" r="3010" b="5093">team</wd>

<space/>

<wd l="3077" t="4939" r="4070" b="5093">USZEGED</wd>

<space/>

<wd l="4152" t="4958" r="4306" b="5093">at</wd>

<space/>

<wd l="4378" t="4934" r="4642" b="5093">the</wd>

<space/>

<wd l="4723" t="4934" r="5462" b="5093">“Lexical</wd>

<space/>

</ln>

<ln l="1781" t="5208" r="5467" b="5410" baseLine="5357" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="5208" r="3029" b="5366">Normalisation</wd>

<space/>

<wd l="3086" t="5208" r="3346" b="5366">for</wd>

<space/>

<wd l="3403" t="5208" r="4070" b="5410">English</wd>

<space/>

<wd l="4128" t="5213" r="4829" b="5366">Tweets”</wd>

<space/>

<wd l="4906" t="5208" r="5467" b="5366">shared</wd>

<space/>

</ln>

<ln l="1781" t="5477" r="5453" b="5678" baseLine="5630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="5477" r="2174" b="5635">task.</wd>

<space/>

<wd l="2314" t="5482" r="2650" b="5635">Our</wd>

<space/>

<wd l="2722" t="5477" r="3518" b="5678">approach</wd>

<space/>

<wd l="3595" t="5477" r="3931" b="5635">first</wd>

<space/>

<wd l="4008" t="5477" r="4733" b="5678">employs</wd>

<space/>

<wd l="4814" t="5530" r="4910" b="5635">a</wd>

<space/>

<wd l="4987" t="5482" r="5453" b="5635">CRF-</wd>

</ln>

<ln l="1781" t="5750" r="5453" b="5952" baseLine="5899" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="5750" r="2280" b="5909">based</wd>

<space/>

<wd l="2342" t="5803" r="3134" b="5952">sequence</wd>

<space/>

<wd l="3192" t="5750" r="3893" b="5952">labeling</wd>

<space/>

<wd l="3950" t="5750" r="4901" b="5909">framework</wd>

<space/>

<wd l="4958" t="5774" r="5126" b="5909">to</wd>

<space/>

<wd l="5194" t="5750" r="5453" b="5909">de-</wd>

</ln>

<ln l="1786" t="6019" r="5453" b="6178" baseLine="6173" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1786" t="6019" r="2141" b="6178">cide</wd>

<space/>

<wd l="2237" t="6019" r="2506" b="6178">the</wd>

<space/>

<wd l="2597" t="6019" r="2986" b="6178">kind</wd>

<space/>

<wd l="3086" t="6019" r="3274" b="6178">of</wd>

<space/>

<wd l="3360" t="6019" r="4315" b="6178">corrections</wd>

<space/>

<wd l="4421" t="6019" r="4685" b="6178">the</wd>

<space/>

<wd l="4781" t="6019" r="5453" b="6178">individ-</wd>

</ln>

<ln l="1781" t="6293" r="5453" b="6494" baseLine="6442" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="6293" r="2045" b="6451">ual</wd>

<space/>

<wd l="2107" t="6293" r="2669" b="6451">tokens</wd>

<space/>

<wd l="2736" t="6293" r="3398" b="6494">require,</wd>

<space/>

<wd l="3475" t="6293" r="3850" b="6451">then</wd>

<space/>

<wd l="3912" t="6293" r="4690" b="6494">performs</wd>

<space/>

<wd l="4762" t="6293" r="5026" b="6451">the</wd>

<space/>

<wd l="5088" t="6346" r="5453" b="6451">nec-</wd>

</ln>

<ln l="1786" t="6562" r="5462" b="6763" baseLine="6715" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1786" t="6614" r="2323" b="6763">essary</wd>

<space/>

<wd l="2429" t="6562" r="3610" b="6720">modifications</wd>

<space/>

<wd l="3720" t="6562" r="4334" b="6763">relying</wd>

<space/>

<wd l="4450" t="6614" r="4661" b="6720">on</wd>

<space/>

<wd l="4771" t="6562" r="5462" b="6720">external</wd>

<space/>

</ln>

<ln l="1781" t="6835" r="5453" b="6994" baseLine="6984" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="6835" r="2496" b="6994">lexicons</wd>

<space/>

<wd l="2582" t="6835" r="2894" b="6994">and</wd>

<space/>

<wd l="2976" t="6888" r="3072" b="6994">a</wd>

<space/>

<wd l="3144" t="6835" r="3835" b="6994">massive</wd>

<space/>

<wd l="3917" t="6835" r="4776" b="6994">collection</wd>

<space/>

<wd l="4853" t="6835" r="5045" b="6994">of</wd>

<space/>

<wd l="5112" t="6835" r="5453" b="6994">effi-</wd>

</ln>

<ln l="1786" t="7104" r="5453" b="7306" baseLine="7253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1786" t="7104" r="2371" b="7306">ciently</wd>

<space/>

<wd l="2434" t="7104" r="3115" b="7262">indexed</wd>

<space/>

<wd l="3173" t="7157" r="3806" b="7306">n-gram</wd>

<space/>

<wd l="3869" t="7104" r="4603" b="7262">statistics</wd>

<space/>

<wd l="4670" t="7104" r="5098" b="7262">from</wd>

<space/>

<wd l="5150" t="7109" r="5453" b="7258">En-</wd>

</ln>

<ln l="1786" t="7378" r="5467" b="7579" baseLine="7526" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1786" t="7378" r="2208" b="7579">glish</wd>

<space/>

<wd l="2309" t="7402" r="2909" b="7536">tweets.</wd>

<space/>

<wd l="3144" t="7382" r="3485" b="7536">Our</wd>

<space/>

<wd l="3595" t="7378" r="4291" b="7536">solution</wd>

<space/>

<wd l="4392" t="7378" r="4531" b="7536">is</wd>

<space/>

<wd l="4646" t="7378" r="5146" b="7536">based</wd>

<space/>

<wd l="5251" t="7430" r="5467" b="7536">on</wd>

<space/>

</ln>

<ln l="1781" t="7646" r="5477" b="7848" baseLine="7795" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="7646" r="2045" b="7805">the</wd>

<space/>

<wd l="2150" t="7646" r="3139" b="7848">assumption</wd>

<space/>

<wd l="3235" t="7646" r="3566" b="7805">that</wd>

<space/>

<wd l="3662" t="7646" r="4090" b="7805">from</wd>

<space/>

<wd l="4181" t="7646" r="4450" b="7805">the</wd>

<space/>

<wd l="4550" t="7670" r="5189" b="7805">context</wd>

<space/>

<wd l="5290" t="7646" r="5477" b="7805">of</wd>

<space/>

</ln>

<ln l="1781" t="7915" r="5453" b="8117" baseLine="8069" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="7915" r="2045" b="8074">the</wd>

<space/>

<wd l="2146" t="7920" r="2597" b="8074">OOV</wd>

<space/>

<wd l="2693" t="7915" r="3269" b="8102">words,</wd>

<space/>

<wd l="3384" t="7915" r="3509" b="8074">it</wd>

<space/>

<wd l="3600" t="7915" r="3734" b="8074">is</wd>

<space/>

<wd l="3835" t="7915" r="4550" b="8117">possible</wd>

<space/>

<wd l="4646" t="7939" r="4810" b="8074">to</wd>

<space/>

<wd l="4910" t="7968" r="5453" b="8074">recon-</wd>

</ln>

<ln l="1790" t="8189" r="5458" b="8390" baseLine="8338" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1790" t="8213" r="2266" b="8347">struct</wd>

<space/>

<wd l="2333" t="8189" r="2530" b="8347">its</wd>

<space/>

<wd l="2606" t="8194" r="2832" b="8347">IV</wd>

<space/>

<wd l="2909" t="8189" r="3845" b="8390">equivalent,</wd>

<space/>

<wd l="3936" t="8242" r="4104" b="8347">as</wd>

<space/>

<wd l="4181" t="8189" r="4613" b="8347">there</wd>

<space/>

<wd l="4690" t="8242" r="4949" b="8347">are</wd>

<space/>

<wd l="5016" t="8242" r="5458" b="8347">users</wd>

<space/>

</ln>

<ln l="1781" t="8458" r="5462" b="8659" baseLine="8611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="8458" r="2150" b="8616">who</wd>

<space/>

<wd l="2222" t="8510" r="2515" b="8616">use</wd>

<space/>

<wd l="2582" t="8458" r="2846" b="8616">the</wd>

<space/>

<wd l="2928" t="8458" r="3658" b="8616">standard</wd>

<space/>

<wd l="3725" t="8458" r="4392" b="8659">English</wd>

<space/>

<wd l="4459" t="8458" r="4886" b="8616">form</wd>

<space/>

<wd l="4954" t="8458" r="5146" b="8616">of</wd>

<space/>

<wd l="5198" t="8458" r="5462" b="8616">the</wd>

<space/>

</ln>

<ln l="1786" t="8731" r="5467" b="8890" baseLine="8880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1786" t="8736" r="2237" b="8890">OOV</wd>

<space/>

<wd l="2314" t="8731" r="2760" b="8890">word</wd>

<space/>

<wd l="2832" t="8731" r="3389" b="8890">within</wd>

<space/>

<wd l="3461" t="8731" r="3725" b="8890">the</wd>

<space/>

<wd l="3806" t="8784" r="4243" b="8890">same</wd>

<space/>

<wd l="4320" t="8755" r="4997" b="8890">context.</wd>

<space/>

<wd l="5131" t="8736" r="5467" b="8890">Our</wd>

<space/>

</ln>

<ln l="1786" t="9000" r="5453" b="9202" baseLine="9154" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1786" t="9000" r="2582" b="9202">approach</wd>

<space/>

<wd l="2669" t="9000" r="3432" b="9158">achieved</wd>

<space/>

<wd l="3518" t="9053" r="3720" b="9158">an</wd>

<space/>

<wd l="3806" t="9005" r="4459" b="9158">F-score</wd>

<space/>

<wd l="4550" t="9000" r="4742" b="9158">of</wd>

<space/>

<wd l="4814" t="9000" r="5453" b="9187">0.8052,</wd>

<space/>

</ln>

<ln l="1781" t="9274" r="5453" b="9475" baseLine="9422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1781" t="9274" r="2261" b="9475">being</wd>

<space/>

<wd l="2338" t="9274" r="2602" b="9432">the</wd>

<space/>

<wd l="2688" t="9274" r="3283" b="9432">second</wd>

<space/>

<wd l="3355" t="9274" r="3710" b="9432">best</wd>

<space/>

<wd l="3787" t="9326" r="4094" b="9432">one</wd>

<space/>

<wd l="4171" t="9326" r="4757" b="9475">among</wd>

<space/>

<wd l="4834" t="9274" r="5102" b="9432">the</wd>

<space/>

<wd l="5174" t="9326" r="5453" b="9432">un-</wd>

</ln>

<ln l="1786" t="9542" r="5467" b="9744" baseLine="9696" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1786" t="9542" r="2789" b="9701">constrained</wd>

<space/>

<wd l="2856" t="9542" r="3960" b="9730">submissions,</wd>

<space/>

<wd l="4037" t="9542" r="4301" b="9701">the</wd>

<space/>

<wd l="4368" t="9566" r="5107" b="9744">category</wd>

<space/>

<wd l="5179" t="9595" r="5467" b="9701">our</wd>

<space/>

</ln>

<ln l="1790" t="9816" r="4171" b="10018" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1790" t="9816" r="2765" b="9974">submission</wd>

<space/>

<wd l="2822" t="9816" r="3163" b="9974">also</wd>

<space/>

<wd l="3221" t="9816" r="3893" b="10018">belongs</wd>

<space/>

<wd l="3955" t="9840" r="4171" b="9974">to.</wd>

</ln>

</para>

<para l="1454" t="10315" r="3091" b="10488" alignment="left" spaceBefore="251" lsp="exactly" lspExact="273" language="en">

<ln l="1454" t="10315" r="3091" b="10488" baseLine="10483" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="1454" t="10315" r="1550" b="10483">1</wd>

<space/>

<wd l="1805" t="10315" r="3091" b="10488">Introduction</wd>

</ln>

</para>

<para l="1440" t="10771" r="5808" b="15307" alignment="justified" spaceBefore="163" lsp="exactly" lspExact="270" language="en">

<ln l="1445" t="10771" r="5808" b="10930" baseLine="10920" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="10771" r="1982" b="10930">Social</wd>

<space/>

<wd l="2035" t="10771" r="2573" b="10930">media</wd>

<space/>

<wd l="2616" t="10771" r="2755" b="10930">is</wd>

<space/>

<wd l="2818" t="10824" r="2914" b="10930">a</wd>

<space/>

<wd l="2957" t="10771" r="3298" b="10930">rich</wd>

<space/>

<wd l="3355" t="10824" r="3912" b="10930">source</wd>

<space/>

<wd l="3970" t="10771" r="4157" b="10930">of</wd>

<space/>

<wd l="4195" t="10771" r="5222" b="10930">information</wd>

<space/>

<wd l="5270" t="10771" r="5808" b="10930">which</wd>

<space/>

</ln>

<ln l="1440" t="11040" r="5794" b="11242" baseLine="11194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="11040" r="1723" b="11198">has</wd>

<space/>

<wd l="1814" t="11040" r="2227" b="11198">been</wd>

<space/>

<wd l="2309" t="11093" r="2909" b="11242">proven</wd>

<space/>

<wd l="2990" t="11064" r="3158" b="11198">to</wd>

<space/>

<wd l="3245" t="11040" r="3446" b="11198">be</wd>

<space/>

<wd l="3533" t="11040" r="4061" b="11198">useful</wd>

<space/>

<wd l="4147" t="11064" r="4315" b="11198">to</wd>

<space/>

<wd l="4406" t="11093" r="4502" b="11198">a</wd>

<space/>

<wd l="4579" t="11040" r="5174" b="11242">variety</wd>

<space/>

<wd l="5266" t="11040" r="5458" b="11198">of</wd>

<space/>

<wd l="5530" t="11093" r="5794" b="11242">ap-</wd>

</ln>

<ln l="1440" t="11314" r="5794" b="11515" baseLine="11462" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="11314" r="2333" b="11515">plications,</wd>

<space/>

<wd l="2419" t="11314" r="2813" b="11472">such</wd>

<space/>

<wd l="2880" t="11366" r="3048" b="11472">as</wd>

<space/>

<wd l="3125" t="11338" r="3586" b="11472">event</wd>

<space/>

<wd l="3658" t="11314" r="4522" b="11472">extraction</wd>

<space/>

<wd l="4594" t="11314" r="5246" b="11510">(Sakaki</wd>

<space/>

<wd l="5323" t="11338" r="5477" b="11472">et</wd>

<space/>

<wd l="5544" t="11314" r="5794" b="11501">al.,</wd>

<space/>

</ln>

<ln l="1445" t="11582" r="5794" b="11779" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="11587" r="1925" b="11770">2010;</wd>

<space/>

<wd l="2083" t="11582" r="2587" b="11741">Ritter</wd>

<space/>

<wd l="2702" t="11606" r="2856" b="11741">et</wd>

<space/>

<wd l="2976" t="11582" r="3226" b="11770">al.,</wd>

<space/>

<wd l="3374" t="11587" r="3854" b="11770">2012;</wd>

<space/>

<wd l="4013" t="11582" r="4512" b="11741">Ritter</wd>

<space/>

<wd l="4632" t="11606" r="4786" b="11741">et</wd>

<space/>

<wd l="4906" t="11582" r="5155" b="11770">al.,</wd>

<space/>

<wd l="5299" t="11582" r="5794" b="11779">2015)</wd>

<space/>

</ln>

<ln l="1445" t="11856" r="5794" b="12058" baseLine="12005" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="11909" r="1627" b="12014">or</wd>

<space/>

<wd l="1699" t="11856" r="2150" b="12014">trend</wd>

<space/>

<wd l="2232" t="11856" r="3072" b="12043">detection,</wd>

<space/>

<wd l="3163" t="11856" r="3984" b="12058">including</wd>

<space/>

<wd l="4066" t="11856" r="4334" b="12014">the</wd>

<space/>

<wd l="4411" t="11856" r="5126" b="12058">tracking</wd>

<space/>

<wd l="5213" t="11856" r="5400" b="12014">of</wd>

<space/>

<wd l="5472" t="11856" r="5794" b="12058">epi-</wd>

</ln>

<ln l="1445" t="12125" r="5798" b="12326" baseLine="12278" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="12125" r="2050" b="12283">demics</wd>

<space/>

<wd l="2170" t="12125" r="2736" b="12322">(Lamb</wd>

<space/>

<wd l="2842" t="12149" r="3000" b="12283">et</wd>

<space/>

<wd l="3101" t="12125" r="3350" b="12312">al.,</wd>

<space/>

<wd l="3480" t="12130" r="4027" b="12322">2013).</wd>

<space/>

<wd l="4238" t="12125" r="5146" b="12326">Analyzing</wd>

<space/>

<wd l="5246" t="12149" r="5798" b="12283">tweets</wd>

<space/>

</ln>

<ln l="1440" t="12398" r="5794" b="12600" baseLine="12547" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="12398" r="1613" b="12552">in</wd>

<space/>

<wd l="1685" t="12398" r="2366" b="12600">general,</wd>

<space/>

<wd l="2448" t="12398" r="3221" b="12586">however,</wd>

<space/>

<wd l="3307" t="12451" r="3605" b="12557">can</wd>

<space/>

<wd l="3677" t="12451" r="4075" b="12600">pose</wd>

<space/>

<wd l="4152" t="12398" r="4752" b="12557">several</wd>

<space/>

<wd l="4829" t="12398" r="5794" b="12557">difficulties.</wd>

<space/>

</ln>

<ln l="1440" t="12667" r="5803" b="12869" baseLine="12816" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="12672" r="1915" b="12826">From</wd>

<space/>

<wd l="1982" t="12720" r="2179" b="12826">an</wd>

<space/>

<wd l="2251" t="12667" r="3269" b="12869">engineering</wd>

<space/>

<wd l="3336" t="12667" r="3787" b="12869">point</wd>

<space/>

<wd l="3854" t="12667" r="4042" b="12826">of</wd>

<space/>

<wd l="4094" t="12667" r="4541" b="12854">view,</wd>

<space/>

<wd l="4618" t="12667" r="4882" b="12826">the</wd>

<space/>

<wd l="4954" t="12667" r="5803" b="12869">streaming</wd>

<space/>

</ln>

<ln l="1440" t="12941" r="5798" b="13142" baseLine="13090" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="12965" r="1982" b="13099">nature</wd>

<space/>

<wd l="2069" t="12941" r="2256" b="13099">of</wd>

<space/>

<wd l="2318" t="12965" r="2870" b="13099">tweets</wd>

<space/>

<wd l="2952" t="12941" r="3648" b="13142">requires</wd>

<space/>

<wd l="3734" t="12941" r="4061" b="13099">that</wd>

<space/>

<wd l="4147" t="12941" r="4742" b="13142">special</wd>

<space/>

<wd l="4824" t="12941" r="5582" b="13099">attention</wd>

<space/>

<wd l="5659" t="12941" r="5798" b="13099">is</wd>

<space/>

</ln>

<ln l="1440" t="13210" r="5808" b="13411" baseLine="13358" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="13210" r="1819" b="13411">paid</wd>

<space/>

<wd l="1906" t="13234" r="2074" b="13368">to</wd>

<space/>

<wd l="2165" t="13210" r="2434" b="13368">the</wd>

<space/>

<wd l="2534" t="13210" r="3418" b="13411">scalability</wd>

<space/>

<wd l="3514" t="13210" r="3706" b="13368">of</wd>

<space/>

<wd l="3782" t="13210" r="4046" b="13368">the</wd>

<space/>

<wd l="4142" t="13210" r="5064" b="13411">algorithms</wd>

<space/>

<wd l="5170" t="13210" r="5808" b="13411">applied</wd>

<space/>

</ln>

<ln l="1445" t="13478" r="5794" b="13680" baseLine="13632" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="13478" r="1757" b="13637">and</wd>

<space/>

<wd l="1848" t="13478" r="2275" b="13637">from</wd>

<space/>

<wd l="2366" t="13531" r="2563" b="13637">an</wd>

<space/>

<wd l="2659" t="13483" r="3067" b="13637">NLP</wd>

<space/>

<wd l="3163" t="13478" r="3610" b="13680">point</wd>

<space/>

<wd l="3706" t="13478" r="3898" b="13637">of</wd>

<space/>

<wd l="3974" t="13478" r="4421" b="13666">view,</wd>

<space/>

<wd l="4531" t="13478" r="4800" b="13637">the</wd>

<space/>

<wd l="4896" t="13478" r="5342" b="13637">often</wd>

<space/>

<wd l="5438" t="13478" r="5794" b="13637">sub-</wd>

</ln>

<ln l="1450" t="13752" r="5798" b="13910" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="13752" r="2179" b="13910">standard</wd>

<space/>

<wd l="2237" t="13752" r="3475" b="13910">characteristics</wd>

<space/>

<wd l="3538" t="13752" r="3730" b="13910">of</wd>

<space/>

<wd l="3782" t="13752" r="4277" b="13910">social</wd>

<space/>

<wd l="4334" t="13752" r="4872" b="13910">media</wd>

<space/>

<wd l="4920" t="13776" r="5798" b="13910">utterances</wd>

<space/>

</ln>

<ln l="1440" t="14021" r="5808" b="14179" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="14021" r="1723" b="14179">has</wd>

<space/>

<wd l="1786" t="14045" r="1954" b="14179">to</wd>

<space/>

<wd l="2011" t="14021" r="2218" b="14179">be</wd>

<space/>

<wd l="2280" t="14021" r="3178" b="14179">addressed.</wd>

<space/>

<wd l="3264" t="14021" r="3600" b="14179">The</wd>

<space/>

<wd l="3658" t="14021" r="3984" b="14179">fact</wd>

<space/>

<wd l="4037" t="14021" r="4368" b="14179">that</wd>

<space/>

<wd l="4421" t="14045" r="4973" b="14179">tweets</wd>

<space/>

<wd l="5040" t="14074" r="5299" b="14179">are</wd>

<space/>

<wd l="5362" t="14021" r="5808" b="14179">often</wd>

<space/>

</ln>

<ln l="1440" t="14294" r="5798" b="14453" baseLine="14443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="14294" r="2059" b="14453">written</wd>

<space/>

<wd l="2117" t="14347" r="2333" b="14453">on</wd>

<space/>

<wd l="2386" t="14294" r="2990" b="14453">mobile</wd>

<space/>

<wd l="3053" t="14294" r="3686" b="14453">devices</wd>

<space/>

<wd l="3758" t="14294" r="4070" b="14453">and</wd>

<space/>

<wd l="4128" t="14347" r="4387" b="14453">are</wd>

<space/>

<wd l="4445" t="14294" r="5194" b="14453">informal</wd>

<space/>

<wd l="5251" t="14294" r="5798" b="14453">makes</wd>

<space/>

</ln>

<ln l="1440" t="14563" r="5794" b="14765" baseLine="14717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="14563" r="1704" b="14722">the</wd>

<space/>

<wd l="1752" t="14563" r="2755" b="14765">misspelling</wd>

<space/>

<wd l="2803" t="14563" r="3115" b="14722">and</wd>

<space/>

<wd l="3163" t="14563" r="4320" b="14722">abbreviations</wd>

<space/>

<wd l="4378" t="14563" r="4570" b="14722">of</wd>

<space/>

<wd l="4598" t="14563" r="5122" b="14722">words</wd>

<space/>

<wd l="5174" t="14563" r="5486" b="14722">and</wd>

<space/>

<wd l="5534" t="14616" r="5794" b="14722">ex-</wd>

</ln>

<ln l="1440" t="14837" r="5803" b="15038" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="14837" r="2294" b="15038">pressions,</wd>

<space/>

<wd l="2390" t="14890" r="2558" b="14995">as</wd>

<space/>

<wd l="2640" t="14837" r="3010" b="14995">well</wd>

<space/>

<wd l="3091" t="14890" r="3259" b="14995">as</wd>

<space/>

<wd l="3341" t="14837" r="3610" b="14995">the</wd>

<space/>

<wd l="3682" t="14890" r="3970" b="14995">use</wd>

<space/>

<wd l="4051" t="14837" r="4238" b="14995">of</wd>

<space/>

<wd l="4306" t="14837" r="4982" b="14995">creative</wd>

<space/>

<wd l="5054" t="14837" r="5803" b="14995">informal</wd>

<space/>

</ln>

<ln l="1440" t="15106" r="5808" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="15106" r="2227" b="15307">language</wd>

<space/>

<wd l="2280" t="15106" r="3125" b="15307">prevalent,</wd>

<space/>

<wd l="3192" t="15106" r="3734" b="15307">giving</wd>

<space/>

<wd l="3792" t="15106" r="4104" b="15264">rise</wd>

<space/>

<wd l="4157" t="15130" r="4325" b="15264">to</wd>

<space/>

<wd l="4387" t="15158" r="4483" b="15264">a</wd>

<space/>

<wd l="4531" t="15106" r="5093" b="15307">higher</wd>

<space/>

<wd l="5141" t="15106" r="5808" b="15264">number</wd>

</ln>

</para>

</column>

<column l="6144" t="4092" r="10531" b="15316">

<para l="6149" t="4157" r="10517" b="4627" alignment="justified" spaceBefore="1" lsp="exactly" lspExact="271" language="en">

<ln l="6149" t="4157" r="10517" b="4358" baseLine="4306" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="4157" r="6341" b="4315">of</wd>

<space/>

<wd l="6413" t="4157" r="7973" b="4358">out-of-vocabulary</wd>

<space/>

<wd l="8064" t="4162" r="8654" b="4354">(OOV)</wd>

<space/>

<wd l="8746" t="4157" r="9269" b="4315">words</wd>

<space/>

<wd l="9355" t="4157" r="9734" b="4315">than</wd>

<space/>

<wd l="9811" t="4157" r="9984" b="4310">in</wd>

<space/>

<wd l="10066" t="4157" r="10517" b="4315">other</wd>

<space/>

</ln>

<ln l="6149" t="4435" r="6758" b="4627" baseLine="4579" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="4478" r="6758" b="4627">genres.</wd>

</ln>

</para>

<para l="6144" t="4901" r="7930" b="5074" alignment="left" spaceBefore="218" lsp="exactly" lspExact="273" language="en">

<ln l="6144" t="4901" r="7930" b="5074" baseLine="5064" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="6144" t="4901" r="6259" b="5069">2</wd>

<space/>

<wd l="6509" t="4906" r="7286" b="5074">Related</wd>

<space/>

<wd l="7349" t="4906" r="7930" b="5074">Work</wd>

</ln>

</para>

<para l="6144" t="5342" r="10512" b="7709" alignment="justified" spaceBefore="152" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="5342" r="10502" b="5544" baseLine="5491" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5342" r="6485" b="5501">The</wd>

<space/>

<wd l="6581" t="5342" r="7330" b="5501">informal</wd>

<space/>

<wd l="7430" t="5342" r="8218" b="5544">language</wd>

<space/>

<wd l="8318" t="5342" r="8510" b="5501">of</wd>

<space/>

<wd l="8602" t="5342" r="9096" b="5501">social</wd>

<space/>

<wd l="9197" t="5342" r="9773" b="5530">media,</wd>

<space/>

<wd l="9893" t="5342" r="10502" b="5501">includ-</wd>

</ln>

<ln l="6144" t="5611" r="10507" b="5813" baseLine="5765" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5611" r="6422" b="5813">ing</wd>

<space/>

<wd l="6509" t="5611" r="7166" b="5798">Twitter,</wd>

<space/>

<wd l="7267" t="5611" r="7406" b="5770">is</wd>

<space/>

<wd l="7502" t="5611" r="8366" b="5813">extremely</wd>

<space/>

<wd l="8453" t="5611" r="9754" b="5813">heterogeneous,</wd>

<space/>

<wd l="9854" t="5611" r="10507" b="5813">making</wd>

<space/>

</ln>

<ln l="6144" t="5885" r="10512" b="6086" baseLine="6034" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5885" r="6346" b="6043">its</wd>

<space/>

<wd l="6432" t="5885" r="7512" b="6086">grammatical</wd>

<space/>

<wd l="7594" t="5885" r="8285" b="6086">analysis</wd>

<space/>

<wd l="8366" t="5938" r="8818" b="6043">more</wd>

<space/>

<wd l="8894" t="5885" r="9576" b="6043">difficult</wd>

<space/>

<wd l="9658" t="5885" r="10512" b="6086">compared</wd>

<space/>

</ln>

<ln l="6144" t="6154" r="10512" b="6355" baseLine="6302" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="6178" r="6312" b="6312">to</wd>

<space/>

<wd l="6394" t="6154" r="7123" b="6312">standard</wd>

<space/>

<wd l="7195" t="6206" r="7752" b="6355">genres</wd>

<space/>

<wd l="7838" t="6154" r="8227" b="6312">such</wd>

<space/>

<wd l="8299" t="6206" r="8472" b="6312">as</wd>

<space/>

<wd l="8544" t="6154" r="9418" b="6312">newswire.</wd>

<space/>

<wd l="9538" t="6158" r="9672" b="6312">It</wd>

<space/>

<wd l="9739" t="6154" r="10022" b="6312">has</wd>

<space/>

<wd l="10099" t="6154" r="10512" b="6312">been</wd>

<space/>

</ln>

<ln l="6154" t="6427" r="10502" b="6629" baseLine="6576" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="6427" r="6710" b="6586">shown</wd>

<space/>

<wd l="6758" t="6427" r="7704" b="6629">previously,</wd>

<space/>

<wd l="7766" t="6427" r="8098" b="6586">that</wd>

<space/>

<wd l="8146" t="6427" r="8410" b="6586">the</wd>

<space/>

<wd l="8462" t="6427" r="9562" b="6629">performance</wd>

<space/>

<wd l="9619" t="6427" r="9806" b="6586">of</wd>

<space/>

<wd l="9845" t="6427" r="10502" b="6629">linguis-</wd>

</ln>

<ln l="6144" t="6696" r="10502" b="6898" baseLine="6845" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="6696" r="6360" b="6854">tic</wd>

<space/>

<wd l="6470" t="6696" r="7277" b="6898">analyzers</wd>

<space/>

<wd l="7387" t="6696" r="7992" b="6854">trained</wd>

<space/>

<wd l="8098" t="6749" r="8314" b="6854">on</wd>

<space/>

<wd l="8419" t="6696" r="9149" b="6854">standard</wd>

<space/>

<wd l="9250" t="6720" r="9576" b="6854">text</wd>

<space/>

<wd l="9672" t="6720" r="10123" b="6898">types</wd>

<space/>

<wd l="10238" t="6696" r="10502" b="6854">de-</wd>

</ln>

<ln l="6149" t="6965" r="10512" b="7166" baseLine="7118" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6965" r="6629" b="7166">grade</wd>

<space/>

<wd l="6686" t="6965" r="7392" b="7166">severely</wd>

<space/>

<wd l="7450" t="7018" r="7853" b="7123">once</wd>

<space/>

<wd l="7906" t="6965" r="8270" b="7166">they</wd>

<space/>

<wd l="8328" t="7018" r="8587" b="7123">are</wd>

<space/>

<wd l="8645" t="6965" r="9283" b="7166">applied</wd>

<space/>

<wd l="9326" t="6989" r="9494" b="7123">to</wd>

<space/>

<wd l="9547" t="6989" r="9946" b="7123">texts</wd>

<space/>

<wd l="10003" t="6965" r="10512" b="7123">found</wd>

<space/>

</ln>

<ln l="6144" t="7238" r="10498" b="7440" baseLine="7387" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="7238" r="6317" b="7392">in</wd>

<space/>

<wd l="6418" t="7238" r="6917" b="7397">social</wd>

<space/>

<wd l="7013" t="7238" r="7589" b="7426">media,</wd>

<space/>

<wd l="7714" t="7238" r="8578" b="7440">especially</wd>

<space/>

<wd l="8674" t="7262" r="9226" b="7397">tweets</wd>

<space/>

<wd l="9336" t="7238" r="9902" b="7435">(Ritter</wd>

<space/>

<wd l="9998" t="7262" r="10152" b="7397">et</wd>

<space/>

<wd l="10248" t="7238" r="10498" b="7426">al.,</wd>

<space/>

</ln>

<ln l="6149" t="7507" r="8832" b="7709" baseLine="7661" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="7512" r="6629" b="7694">2011;</wd>

<space/>

<wd l="6696" t="7507" r="7690" b="7709">Derczynski</wd>

<space/>

<wd l="7752" t="7531" r="7906" b="7666">et</wd>

<space/>

<wd l="7963" t="7507" r="8213" b="7694">al.,</wd>

<space/>

<wd l="8285" t="7512" r="8832" b="7704">2013).</wd>

</ln>

</para>

<para l="6144" t="7781" r="10512" b="11232" alignment="justified" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="6365" t="7781" r="10502" b="7982" baseLine="7934" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6365" t="7786" r="6547" b="7934">In</wd>

<space/>

<wd l="6619" t="7781" r="7080" b="7939">order</wd>

<space/>

<wd l="7142" t="7805" r="7306" b="7939">to</wd>

<space/>

<wd l="7378" t="7781" r="7824" b="7939">build</wd>

<space/>

<wd l="7891" t="7805" r="8510" b="7982">taggers</wd>

<space/>

<wd l="8587" t="7781" r="8918" b="7939">that</wd>

<space/>

<wd l="8981" t="7781" r="9686" b="7982">perform</wd>

<space/>

<wd l="9754" t="7834" r="10200" b="7939">more</wd>

<space/>

<wd l="10267" t="7834" r="10502" b="7939">re-</wd>

</ln>

<ln l="6144" t="8054" r="10502" b="8256" baseLine="8203" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="8054" r="6629" b="8213">liable</wd>

<space/>

<wd l="6710" t="8107" r="6926" b="8213">on</wd>

<space/>

<wd l="7013" t="8054" r="7507" b="8213">social</wd>

<space/>

<wd l="7589" t="8054" r="8126" b="8213">media</wd>

<space/>

<wd l="8198" t="8078" r="8650" b="8242">texts,</wd>

<space/>

<wd l="8750" t="8107" r="9058" b="8213">one</wd>

<space/>

<wd l="9134" t="8054" r="9850" b="8256">possible</wd>

<space/>

<wd l="9926" t="8107" r="10286" b="8256">way</wd>

<space/>

<wd l="10363" t="8054" r="10502" b="8213">is</wd>

<space/>

</ln>

<ln l="6144" t="8323" r="10502" b="8525" baseLine="8477" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="8347" r="6312" b="8482">to</wd>

<space/>

<wd l="6418" t="8347" r="7166" b="8525">augment</wd>

<space/>

<wd l="7262" t="8323" r="7526" b="8482">the</wd>

<space/>

<wd l="7627" t="8323" r="8304" b="8525">training</wd>

<space/>

<wd l="8410" t="8323" r="8770" b="8482">data</wd>

<space/>

<wd l="8866" t="8323" r="9077" b="8525">by</wd>

<space/>

<wd l="9182" t="8323" r="10003" b="8525">including</wd>

<space/>

<wd l="10104" t="8347" r="10502" b="8482">texts</wd>

<space/>

</ln>

<ln l="6149" t="8597" r="10498" b="8798" baseLine="8746" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="8597" r="7099" b="8798">originating</wd>

<space/>

<wd l="7176" t="8597" r="7603" b="8755">from</wd>

<space/>

<wd l="7690" t="8597" r="8184" b="8755">social</wd>

<space/>

<wd l="8261" t="8597" r="8798" b="8755">media</wd>

<space/>

<wd l="8880" t="8597" r="9931" b="8798">(Derczynski</wd>

<space/>

<wd l="10018" t="8621" r="10171" b="8755">et</wd>

<space/>

<wd l="10248" t="8597" r="10498" b="8784">al.,</wd>

<space/>

</ln>

<ln l="6149" t="8866" r="10502" b="9067" baseLine="9019" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="8870" r="6696" b="9062">2013).</wd>

<space/>

<wd l="6883" t="8866" r="7315" b="9024">Such</wd>

<space/>

<wd l="7406" t="8866" r="8429" b="9067">approaches,</wd>

<space/>

<wd l="8539" t="8866" r="9307" b="9053">however,</wd>

<space/>

<wd l="9418" t="8866" r="10032" b="9067">require</wd>

<space/>

<wd l="10128" t="8918" r="10502" b="9024">con-</wd>

</ln>

<ln l="6154" t="9139" r="10512" b="9341" baseLine="9288" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="9139" r="6931" b="9298">siderable</wd>

<space/>

<wd l="6984" t="9139" r="7579" b="9298">human</wd>

<space/>

<wd l="7637" t="9139" r="8155" b="9326">effort,</wd>

<space/>

<wd l="8227" t="9192" r="8410" b="9298">so</wd>

<space/>

<wd l="8467" t="9192" r="8779" b="9298">one</wd>

<space/>

<wd l="8832" t="9139" r="9547" b="9341">possible</wd>

<space/>

<wd l="9605" t="9139" r="10512" b="9298">alternative</wd>

<space/>

</ln>

<ln l="6149" t="9408" r="10498" b="9595" baseLine="9562" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="9461" r="6446" b="9566">can</wd>

<space/>

<wd l="6538" t="9408" r="6739" b="9566">be</wd>

<space/>

<wd l="6830" t="9432" r="6998" b="9566">to</wd>

<space/>

<wd l="7090" t="9408" r="7958" b="9566">normalize</wd>

<space/>

<wd l="8050" t="9408" r="8314" b="9566">the</wd>

<space/>

<wd l="8410" t="9408" r="8909" b="9566">social</wd>

<space/>

<wd l="9000" t="9408" r="9538" b="9566">media</wd>

<space/>

<wd l="9619" t="9432" r="10022" b="9566">texts</wd>

<space/>

<wd l="10123" t="9408" r="10498" b="9595">first,</wd>

<space/>

</ln>

<ln l="6144" t="9682" r="10512" b="9883" baseLine="9830" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="9682" r="6523" b="9840">then</wd>

<space/>

<wd l="6576" t="9682" r="7051" b="9883">apply</wd>

<space/>

<wd l="7114" t="9682" r="7843" b="9840">standard</wd>

<space/>

<wd l="7896" t="9682" r="8707" b="9883">analyzers</wd>

<space/>

<wd l="8770" t="9734" r="8986" b="9840">on</wd>

<space/>

<wd l="9034" t="9682" r="9480" b="9840">these</wd>

<space/>

<wd l="9528" t="9682" r="10512" b="9840">normalized</wd>

<space/>

</ln>

<ln l="6144" t="9950" r="10512" b="10152" baseLine="10104" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="9974" r="6600" b="10109">texts.</wd>

<space/>

<wd l="6672" t="9950" r="7478" b="10152">Recently,</wd>

<space/>

<wd l="7541" t="10003" r="7637" b="10109">a</wd>

<space/>

<wd l="7680" t="9950" r="8352" b="10109">number</wd>

<space/>

<wd l="8400" t="9950" r="8587" b="10109">of</wd>

<space/>

<wd l="8626" t="9950" r="9595" b="10152">approaches</wd>

<space/>

<wd l="9648" t="9950" r="10051" b="10109">have</wd>

<space/>

<wd l="10099" t="9950" r="10512" b="10109">been</wd>

<space/>

</ln>

<ln l="6144" t="10224" r="10507" b="10426" baseLine="10373" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="10224" r="6946" b="10426">proposed</wd>

<space/>

<wd l="6998" t="10224" r="7258" b="10382">for</wd>

<space/>

<wd l="7306" t="10224" r="7574" b="10382">the</wd>

<space/>

<wd l="7627" t="10224" r="8203" b="10382">lexical</wd>

<space/>

<wd l="8261" t="10224" r="9470" b="10382">normalization</wd>

<space/>

<wd l="9528" t="10224" r="9720" b="10382">of</wd>

<space/>

<wd l="9758" t="10224" r="10507" b="10382">informal</wd>

<space/>

</ln>

<ln l="6154" t="10493" r="10498" b="10694" baseLine="10642" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="10493" r="6811" b="10694">(mostly</wd>

<space/>

<wd l="6898" t="10493" r="7397" b="10651">social</wd>

<space/>

<wd l="7478" t="10493" r="8016" b="10651">media</wd>

<space/>

<wd l="8093" t="10493" r="8405" b="10651">and</wd>

<space/>

<wd l="8486" t="10498" r="8981" b="10690">SMS)</wd>

<space/>

<wd l="9067" t="10517" r="9470" b="10651">texts</wd>

<space/>

<wd l="9566" t="10493" r="9926" b="10690">(Liu</wd>

<space/>

<wd l="10013" t="10517" r="10166" b="10651">et</wd>

<space/>

<wd l="10248" t="10493" r="10498" b="10680">al.,</wd>

<space/>

</ln>

<ln l="6149" t="10762" r="10512" b="10963" baseLine="10915" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="10766" r="6629" b="10949">2011;</wd>

<space/>

<wd l="6706" t="10762" r="7003" b="10920">Liu</wd>

<space/>

<wd l="7070" t="10786" r="7224" b="10920">et</wd>

<space/>

<wd l="7286" t="10762" r="7536" b="10949">al.,</wd>

<space/>

<wd l="7613" t="10766" r="8093" b="10949">2012;</wd>

<space/>

<wd l="8165" t="10766" r="8525" b="10920">Han</wd>

<space/>

<wd l="8592" t="10786" r="8746" b="10920">et</wd>

<space/>

<wd l="8808" t="10762" r="9058" b="10949">al.,</wd>

<space/>

<wd l="9134" t="10766" r="9614" b="10949">2013;</wd>

<space/>

<wd l="9691" t="10766" r="10133" b="10963">Yang</wd>

<space/>

<wd l="10200" t="10762" r="10512" b="10920">and</wd>

<space/>

</ln>

<ln l="6144" t="11035" r="7704" b="11232" baseLine="11184" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="11035" r="7085" b="11222">Eisenstein,</wd>

<space/>

<wd l="7157" t="11040" r="7704" b="11232">2013).</wd>

</ln>

</para>

<para l="6144" t="11309" r="10512" b="13906" alignment="justified" spaceBefore="7" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="6365" t="11309" r="10502" b="11510" baseLine="11458" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="11314" r="6725" b="11467">Han</wd>

<space/>

<wd l="6797" t="11309" r="7104" b="11467">and</wd>

<space/>

<wd l="7166" t="11309" r="7910" b="11467">Baldwin</wd>

<space/>

<wd l="7978" t="11314" r="8539" b="11506">(2011)</wd>

<space/>

<wd l="8616" t="11309" r="8952" b="11510">rely</wd>

<space/>

<wd l="9019" t="11362" r="9235" b="11467">on</wd>

<space/>

<wd l="9298" t="11309" r="9562" b="11467">the</wd>

<space/>

<wd l="9624" t="11309" r="10502" b="11467">identifica-</wd>

</ln>

<ln l="6144" t="11582" r="10502" b="11784" baseLine="11731" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="11582" r="6485" b="11741">tion</wd>

<space/>

<wd l="6552" t="11582" r="6744" b="11741">of</wd>

<space/>

<wd l="6792" t="11582" r="7061" b="11741">the</wd>

<space/>

<wd l="7123" t="11582" r="7646" b="11741">words</wd>

<space/>

<wd l="7718" t="11582" r="8045" b="11741">that</wd>

<space/>

<wd l="8107" t="11582" r="8726" b="11784">require</wd>

<space/>

<wd l="8794" t="11582" r="9715" b="11770">correction,</wd>

<space/>

<wd l="9792" t="11582" r="10171" b="11741">then</wd>

<space/>

<wd l="10238" t="11582" r="10502" b="11741">de-</wd>

</ln>

<ln l="6149" t="11851" r="10507" b="12053" baseLine="12000" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="11851" r="6470" b="12010">fine</wd>

<space/>

<wd l="6562" t="11904" r="6658" b="12010">a</wd>

<space/>

<wd l="6739" t="11851" r="7598" b="12010">confusion</wd>

<space/>

<wd l="7690" t="11875" r="7925" b="12010">set</wd>

<space/>

<wd l="8011" t="11851" r="8923" b="12053">containing</wd>

<space/>

<wd l="9010" t="11851" r="9278" b="12010">the</wd>

<space/>

<wd l="9365" t="11851" r="10195" b="12010">candidate</wd>

<space/>

<wd l="10282" t="11856" r="10507" b="12010">IV</wd>

<space/>

</ln>

<ln l="6149" t="12120" r="10502" b="12322" baseLine="12274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="12120" r="7032" b="12278">correction</wd>

<space/>

<wd l="7099" t="12120" r="7598" b="12278">forms</wd>

<space/>

<wd l="7675" t="12120" r="7934" b="12278">for</wd>

<space/>

<wd l="8006" t="12120" r="8400" b="12278">such</wd>

<space/>

<wd l="8462" t="12120" r="9038" b="12278">words.</wd>

<space/>

<wd l="9158" t="12120" r="9802" b="12322">Finally,</wd>

<space/>

<wd l="9893" t="12173" r="9989" b="12278">a</wd>

<space/>

<wd l="10051" t="12120" r="10502" b="12278">rank-</wd>

</ln>

<ln l="6144" t="12394" r="10502" b="12595" baseLine="12542" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="12394" r="6422" b="12595">ing</wd>

<space/>

<wd l="6523" t="12394" r="7210" b="12581">scheme,</wd>

<space/>

<wd l="7315" t="12394" r="7858" b="12595">taking</wd>

<space/>

<wd l="7949" t="12394" r="8674" b="12595">multiple</wd>

<space/>

<wd l="8765" t="12394" r="9346" b="12552">factors</wd>

<space/>

<wd l="9442" t="12394" r="9778" b="12552">into</wd>

<space/>

<wd l="9874" t="12394" r="10502" b="12552">consid-</wd>

</ln>

<ln l="6149" t="12662" r="10507" b="12864" baseLine="12816" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="12662" r="6797" b="12850">eration,</wd>

<space/>

<wd l="6902" t="12662" r="7037" b="12821">is</wd>

<space/>

<wd l="7138" t="12662" r="7776" b="12864">applied</wd>

<space/>

<wd l="7862" t="12662" r="8395" b="12821">which</wd>

<space/>

<wd l="8491" t="12662" r="9058" b="12821">selects</wd>

<space/>

<wd l="9154" t="12662" r="9418" b="12821">the</wd>

<space/>

<wd l="9504" t="12686" r="9931" b="12821">most</wd>

<space/>

<wd l="10018" t="12662" r="10507" b="12864">likely</wd>

<space/>

</ln>

<ln l="6149" t="12936" r="10512" b="13138" baseLine="13085" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="12936" r="7032" b="13094">correction</wd>

<space/>

<wd l="7099" t="12936" r="7354" b="13094">for</wd>

<space/>

<wd l="7426" t="12989" r="7622" b="13094">an</wd>

<space/>

<wd l="7694" t="12941" r="8150" b="13094">OOV</wd>

<space/>

<wd l="8222" t="12936" r="8707" b="13094">word.</wd>

<space/>

<wd l="8827" t="12941" r="9010" b="13090">In</wd>

<space/>

<wd l="9077" t="12936" r="9480" b="13094">their</wd>

<space/>

<wd l="9552" t="12936" r="10512" b="13138">subsequent</wd>

<space/>

</ln>

<ln l="6144" t="13205" r="10512" b="13406" baseLine="13358" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="13205" r="6634" b="13392">work,</wd>

<space/>

<wd l="6763" t="13210" r="7123" b="13363">Han</wd>

<space/>

<wd l="7238" t="13229" r="7392" b="13363">et</wd>

<space/>

<wd l="7498" t="13205" r="7694" b="13363">al.</wd>

<space/>

<wd l="7819" t="13210" r="8381" b="13402">(2012)</wd>

<space/>

<wd l="8496" t="13258" r="9187" b="13406">propose</wd>

<space/>

<wd l="9298" t="13258" r="9494" b="13363">an</wd>

<space/>

<wd l="9605" t="13205" r="10512" b="13363">automated</wd>

<space/>

</ln>

<ln l="6144" t="13478" r="10502" b="13637" baseLine="13627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="13478" r="6802" b="13637">method</wd>

<space/>

<wd l="6845" t="13502" r="7013" b="13637">to</wd>

<space/>

<wd l="7066" t="13502" r="7862" b="13637">construct</wd>

<space/>

<wd l="7915" t="13502" r="8635" b="13637">accurate</wd>

<space/>

<wd l="8683" t="13478" r="9898" b="13637">normalization</wd>

<space/>

<wd l="9946" t="13478" r="10502" b="13637">dictio-</wd>

</ln>

<ln l="6144" t="13747" r="6710" b="13906" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="13747" r="6710" b="13906">naries.</wd>

</ln>

</para>

<para l="6144" t="14021" r="10526" b="15307" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="269" language="en">

<ln l="6365" t="14021" r="10507" b="14222" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6365" t="14021" r="6662" b="14179">Liu</wd>

<space/>

<wd l="6720" t="14045" r="6874" b="14179">et</wd>

<space/>

<wd l="6922" t="14021" r="7118" b="14179">al.</wd>

<space/>

<wd l="7186" t="14026" r="7733" b="14218">(2011;</wd>

<space/>

<wd l="7800" t="14026" r="8290" b="14218">2012)</wd>

<space/>

<wd l="8347" t="14074" r="9038" b="14222">propose</wd>

<space/>

<wd l="9091" t="14074" r="9187" b="14179">a</wd>

<space/>

<wd l="9230" t="14021" r="10507" b="14179">character-level</wd>

<space/>

</ln>

<ln l="6154" t="14294" r="10502" b="14496" baseLine="14443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6154" t="14347" r="6946" b="14496">sequence</wd>

<space/>

<wd l="7051" t="14294" r="7594" b="14453">model</wd>

<space/>

<wd l="7709" t="14318" r="7872" b="14453">to</wd>

<space/>

<wd l="7982" t="14294" r="8592" b="14496">predict</wd>

<space/>

<wd l="8698" t="14294" r="9590" b="14482">insertions,</wd>

<space/>

<wd l="9730" t="14294" r="10502" b="14453">deletions</wd>

<space/>

</ln>

<ln l="6149" t="14563" r="10526" b="14765" baseLine="14717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6149" t="14563" r="6461" b="14722">and</wd>

<space/>

<wd l="6538" t="14563" r="7675" b="14722">substitutions.</wd>

<space/>

<wd l="7790" t="14563" r="8232" b="14765">They</wd>

<space/>

<wd l="8309" t="14563" r="8645" b="14722">first</wd>

<space/>

<wd l="8712" t="14563" r="9293" b="14722">collect</wd>

<space/>

<wd l="9360" t="14616" r="9456" b="14722">a</wd>

<space/>

<wd l="9523" t="14563" r="9950" b="14765">large</wd>

<space/>

<wd l="10027" t="14587" r="10262" b="14722">set</wd>

<space/>

<wd l="10334" t="14563" r="10526" b="14722">of</wd>

<space/>

</ln>

<ln l="6144" t="14837" r="10498" b="15038" baseLine="14986">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="6144" t="14837" r="6614" b="15038">noisy</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="-4"><wd l="6763" t="14856" r="7301" b="15024">(OOV,</wd>

<space/>

<wd l="7507" t="14856" r="7819" b="15019">IV)</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="7958" t="14837" r="8630" b="15038">training</wd>

<space/>

<wd l="8717" t="14837" r="9130" b="15038">pairs</wd>

<space/>

<wd l="9221" t="14837" r="9648" b="14995">from</wd>

<space/>

<wd l="9725" t="14837" r="9989" b="14995">the</wd>

<space/>

<wd l="10070" t="14837" r="10498" b="14995">Web.</wd>

<space/>

</run>

</ln>

<ln l="6144" t="15106" r="10507" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6144" t="15106" r="6667" b="15264">These</wd>

<space/>

<wd l="6730" t="15106" r="7147" b="15307">pairs</wd>

<space/>

<wd l="7219" t="15158" r="7478" b="15264">are</wd>

<space/>

<wd l="7546" t="15106" r="7920" b="15264">then</wd>

<space/>

<wd l="7987" t="15106" r="8626" b="15307">aligned</wd>

<space/>

<wd l="8693" t="15130" r="8846" b="15264">at</wd>

<space/>

<wd l="8904" t="15106" r="9173" b="15264">the</wd>

<space/>

<wd l="9240" t="15106" r="10037" b="15264">character</wd>

<space/>

<wd l="10094" t="15106" r="10507" b="15264">level</wd>

</ln>

</para>

</column>

</section>

<section l="1433" t="15316" r="10531" b="16480">

<column l="1433" t="15316" r="10531" b="16480">

<para l="5771" t="15792" r="6200" b="15946" alignment="centered" spaceBefore="405" lsp="exactly" lspExact="271" language="en">

<ln l="5837" t="15792" r="6134" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="24">

<wd l="5837" t="15792" r="6134" b="15946">120</wd>

</ln>

</para>

<para l="2827" t="16133" r="9072" b="16469" alignment="centered" spaceBefore="140" lsp="exactly" lspExact="170" language="en">

<ln l="2827" t="16133" r="9072" b="16301" baseLine="16253" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2827" t="16133" r="3710" b="16296">Proceedings</wd>

<space/>

<wd l="3763" t="16133" r="3926" b="16296">of</wd>

<space/>

<wd l="3950" t="16133" r="4162" b="16262">the</wd>

<space/>

<wd l="4200" t="16138" r="4531" b="16262">ACL</wd>

<space/>

<wd l="4574" t="16133" r="4934" b="16262">2015</wd>

<space/>

<wd l="4987" t="16133" r="5688" b="16296">Workshop</wd>

<space/>

<wd l="5741" t="16176" r="5914" b="16262">on</wd>

<space/>

<wd l="5957" t="16138" r="6365" b="16296">Noisy</wd>

<space/>

<wd l="6427" t="16133" r="7536" b="16296">User-generated</wd>

<space/>

</run>

<wd l="7584" t="16138" r="7901" b="16286"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Text</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7954" t="16171" r="8352" b="16301">pages</wd>

<space/>

<wd l="8424" t="16133" r="9072" b="16286">120–125,
</wd>

</run>

</ln>

<ln l="3029" t="16296" r="8870" b="16469" baseLine="16425" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="16301" r="3595" b="16469">Beijing,</wd>

<space/>

<wd l="3653" t="16301" r="4114" b="16454">China,</wd>

<space/>

<wd l="4166" t="16301" r="4459" b="16469">July</wd>

<space/>

<wd l="4512" t="16301" r="4723" b="16454">31,</wd>

<space/>

<wd l="4781" t="16301" r="5170" b="16430">2015.</wd>

<space/>

</run>

<wd l="5246" t="16296" r="5770" b="16469"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">�</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5822" t="16301" r="6672" b="16430">Association</wd>

<space/>

<wd l="6715" t="16301" r="6926" b="16430">for</wd>

<space/>

<wd l="6974" t="16301" r="8035" b="16469">Computational</wd>

<space/>

<wd l="8078" t="16301" r="8870" b="16469">Linguistics</wd>

</run>

</ln>

</para>

</column>

</section>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4318.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1205" marginRight="1385" marginBottom="1292" offsetX="-22" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1205" r="10524" b="15317">

<column l="1440" t="1205" r="5822" b="15317">

<para l="1440" t="1320" r="5794" b="2064" alignment="justified" spaceBefore="53" lsp="exactly" lspExact="271" language="en">

<ln l="1445" t="1320" r="5794" b="1522" baseLine="1474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="1320" r="1757" b="1478">and</wd>

<space/>

<wd l="1814" t="1320" r="2587" b="1522">provided</wd>

<space/>

<wd l="2650" t="1373" r="2818" b="1478">as</wd>

<space/>

<wd l="2885" t="1320" r="3562" b="1522">training</wd>

<space/>

<wd l="3629" t="1320" r="3994" b="1478">data</wd>

<space/>

<wd l="4046" t="1320" r="4306" b="1478">for</wd>

<space/>

<wd l="4363" t="1373" r="4459" b="1478">a</wd>

<space/>

<wd l="4522" t="1325" r="4925" b="1478">CRF</wd>

<space/>

<wd l="4992" t="1320" r="5794" b="1478">classifier.</wd>

<space/>

</ln>

<ln l="1440" t="1594" r="5794" b="1781" baseLine="1742" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="1594" r="1781" b="1752">The</wd>

<space/>

<wd l="1834" t="1594" r="2462" b="1752">authors</wd>

<space/>

<wd l="2530" t="1594" r="2870" b="1752">also</wd>

<space/>

<wd l="2923" t="1594" r="3643" b="1752">released</wd>

<space/>

<wd l="3691" t="1594" r="4094" b="1752">their</wd>

<space/>

<wd l="4147" t="1594" r="5395" b="1781">3,802-element</wd>

<space/>

<wd l="5448" t="1646" r="5794" b="1752">nor-</wd>

</ln>

<ln l="1440" t="1862" r="5717" b="2064" baseLine="2016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="1862" r="2362" b="2021">malization</wd>

<space/>

<wd l="2419" t="1862" r="3298" b="2064">dictionary</wd>

<space/>

<wd l="3355" t="1862" r="3686" b="2021">that</wd>

<space/>

<wd l="3744" t="1915" r="4032" b="2021">our</wd>

<space/>

<wd l="4080" t="1862" r="4531" b="2021">work</wd>

<space/>

<wd l="4589" t="1862" r="4930" b="2021">also</wd>

<space/>

<wd l="4987" t="1862" r="5453" b="2021">relies</wd>

<space/>

<wd l="5520" t="1886" r="5717" b="2021">at.</wd>

</ln>

</para>

<para l="1440" t="2141" r="5808" b="5592" alignment="justified" spaceBefore="5" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="1661" t="2141" r="5794" b="2342" baseLine="2290" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1661" t="2146" r="2107" b="2342">Yang</wd>

<space/>

<wd l="2179" t="2141" r="2491" b="2299">and</wd>

<space/>

<wd l="2554" t="2141" r="3451" b="2299">Eisenstein</wd>

<space/>

<wd l="3523" t="2146" r="4085" b="2338">(2013)</wd>

<space/>

<wd l="4162" t="2141" r="4982" b="2299">introduce</wd>

<space/>

<wd l="5054" t="2194" r="5251" b="2299">an</wd>

<space/>

<wd l="5318" t="2194" r="5794" b="2299">unsu-</wd>

</ln>

<ln l="1440" t="2410" r="5794" b="2611" baseLine="2563" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="2410" r="2179" b="2611">pervised</wd>

<space/>

<wd l="2242" t="2410" r="3096" b="2611">log-linear</wd>

<space/>

<wd l="3154" t="2410" r="3696" b="2568">model</wd>

<space/>

<wd l="3758" t="2410" r="4018" b="2568">for</wd>

<space/>

<wd l="4075" t="2410" r="4339" b="2568">the</wd>

<space/>

<wd l="4402" t="2410" r="4757" b="2568">task</wd>

<space/>

<wd l="4824" t="2410" r="5011" b="2568">of</wd>

<space/>

<wd l="5059" t="2434" r="5386" b="2568">text</wd>

<space/>

<wd l="5448" t="2462" r="5794" b="2568">nor-</wd>

</ln>

<ln l="1440" t="2683" r="5794" b="2842" baseLine="2832" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="2683" r="2405" b="2842">malization.</wd>

<space/>

<wd l="2563" t="2683" r="3235" b="2842">Besides</wd>

<space/>

<wd l="3326" t="2683" r="3590" b="2842">the</wd>

<space/>

<wd l="3672" t="2683" r="4358" b="2842">features</wd>

<space/>

<wd l="4445" t="2683" r="4776" b="2842">that</wd>

<space/>

<wd l="4858" t="2736" r="5155" b="2842">can</wd>

<space/>

<wd l="5242" t="2683" r="5443" b="2842">be</wd>

<space/>

<wd l="5530" t="2683" r="5794" b="2842">de-</wd>

</ln>

<ln l="1440" t="2952" r="5794" b="3154" baseLine="3101" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="2952" r="1882" b="3110">rived</wd>

<space/>

<wd l="1939" t="2952" r="2366" b="3110">from</wd>

<space/>

<wd l="2419" t="2952" r="2837" b="3154">pairs</wd>

<space/>

<wd l="2909" t="2952" r="3101" b="3110">of</wd>

<space/>

<wd l="3144" t="2952" r="3667" b="3110">words</wd>

<space/>

<wd l="3744" t="2957" r="4109" b="3154">(e.g.</wd>

<space/>

<wd l="4186" t="2952" r="4507" b="3110">edit</wd>

<space/>

<wd l="4570" t="2952" r="5395" b="3149">distance),</wd>

<space/>

<wd l="5467" t="2952" r="5794" b="3110">fea-</wd>

</ln>

<ln l="1440" t="3226" r="5808" b="3427" baseLine="3374" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="3250" r="1858" b="3384">tures</wd>

<space/>

<wd l="1920" t="3226" r="2933" b="3427">considering</wd>

<space/>

<wd l="2990" t="3226" r="3254" b="3384">the</wd>

<space/>

<wd l="3312" t="3250" r="3950" b="3384">context</wd>

<space/>

<wd l="4003" t="3278" r="4267" b="3384">are</wd>

<space/>

<wd l="4325" t="3226" r="4666" b="3384">also</wd>

<space/>

<wd l="4728" t="3226" r="5582" b="3427">employed</wd>

<space/>

<wd l="5635" t="3226" r="5808" b="3379">in</wd>

<space/>

</ln>

<ln l="1440" t="3494" r="5808" b="3653" baseLine="3643" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="3494" r="1843" b="3653">their</wd>

<space/>

<wd l="1901" t="3494" r="2491" b="3653">model.</wd>

<space/>

<wd l="2587" t="3499" r="2822" b="3653">As</wd>

<space/>

<wd l="2894" t="3494" r="3158" b="3653">the</wd>

<space/>

<wd l="3221" t="3494" r="3893" b="3653">number</wd>

<space/>

<wd l="3955" t="3494" r="4142" b="3653">of</wd>

<space/>

<wd l="4195" t="3494" r="4608" b="3653">class</wd>

<space/>

<wd l="4675" t="3494" r="5179" b="3653">labels</wd>

<space/>

<wd l="5246" t="3494" r="5419" b="3648">in</wd>

<space/>

<wd l="5477" t="3494" r="5808" b="3653">that</wd>

<space/>

</ln>

<ln l="1440" t="3763" r="5803" b="3965" baseLine="3917" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="3763" r="1982" b="3922">model</wd>

<space/>

<wd l="2035" t="3763" r="2174" b="3922">is</wd>

<space/>

<wd l="2237" t="3763" r="2702" b="3965">equal</wd>

<space/>

<wd l="2755" t="3787" r="2918" b="3922">to</wd>

<space/>

<wd l="2971" t="3763" r="3240" b="3922">the</wd>

<space/>

<wd l="3298" t="3763" r="3629" b="3922">size</wd>

<space/>

<wd l="3682" t="3763" r="3874" b="3922">of</wd>

<space/>

<wd l="3912" t="3763" r="4176" b="3922">the</wd>

<space/>

<wd l="4229" t="3768" r="4454" b="3922">IV</wd>

<space/>

<wd l="4507" t="3763" r="5030" b="3922">words</wd>

<space/>

<wd l="5093" t="3816" r="5290" b="3922">an</wd>

<space/>

<wd l="5347" t="3768" r="5803" b="3922">OOV</wd>

<space/>

</ln>

<ln l="1440" t="4037" r="5808" b="4238" baseLine="4186" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4037" r="1886" b="4195">word</wd>

<space/>

<wd l="1958" t="4037" r="2438" b="4195">could</wd>

<space/>

<wd l="2506" t="4037" r="3226" b="4238">possibly</wd>

<space/>

<wd l="3298" t="4037" r="3504" b="4195">be</wd>

<space/>

<wd l="3576" t="4037" r="4382" b="4195">corrected</wd>

<space/>

<wd l="4450" t="4061" r="4613" b="4195">to</wd>

<space/>

<wd l="4694" t="4037" r="5515" b="4238">(typically</wd>

<space/>

<wd l="5592" t="4090" r="5808" b="4195">on</wd>

<space/>

</ln>

<ln l="1440" t="4272" r="5794" b="4507" baseLine="4455">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1440" t="4306" r="1704" b="4464">the</wd>

<space/>

<wd l="1771" t="4306" r="2232" b="4464">order</wd>

<space/>

<wd l="2290" t="4306" r="2482" b="4464">of</wd>

<space/>

</run>

<wd l="2554" t="4272" r="3259" b="4493"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">10</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">4</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">-10</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">5</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="3331" t="4306" r="3864" b="4464">which</wd>

<space/>

<wd l="3926" t="4306" r="4061" b="4464">is</wd>

<space/>

<wd l="4128" t="4306" r="4373" b="4464">far</wd>

<space/>

<wd l="4430" t="4306" r="5069" b="4507">beyond</wd>

<space/>

<wd l="5126" t="4306" r="5395" b="4464">the</wd>

<space/>

<wd l="5453" t="4330" r="5794" b="4507">typ-</wd>

</run>

</ln>

<ln l="1440" t="4579" r="5798" b="4776" baseLine="4728" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4579" r="1752" b="4738">ical</wd>

<space/>

<wd l="1824" t="4579" r="2246" b="4738">label</wd>

<space/>

<wd l="2328" t="4579" r="2659" b="4738">size</wd>

<space/>

<wd l="2731" t="4579" r="2923" b="4738">of</wd>

<space/>

<wd l="2986" t="4579" r="4118" b="4738">classification</wd>

<space/>

<wd l="4190" t="4579" r="4742" b="4776">tasks),</wd>

<space/>

<wd l="4829" t="4579" r="5093" b="4738">the</wd>

<space/>

<wd l="5170" t="4579" r="5798" b="4738">authors</wd>

<space/>

</ln>

<ln l="1440" t="4848" r="5794" b="5050" baseLine="5002" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4901" r="2131" b="5050">propose</wd>

<space/>

<wd l="2198" t="4848" r="2467" b="5006">the</wd>

<space/>

<wd l="2534" t="4901" r="2827" b="5006">use</wd>

<space/>

<wd l="2904" t="4848" r="3091" b="5006">of</wd>

<space/>

<wd l="3154" t="4848" r="4066" b="5050">Sequential</wd>

<space/>

<wd l="4138" t="4853" r="4709" b="5006">Monte</wd>

<space/>

<wd l="4786" t="4848" r="5261" b="5006">Carlo</wd>

<space/>

<wd l="5333" t="4848" r="5794" b="5006">train-</wd>

</ln>

<ln l="1440" t="5122" r="5803" b="5323" baseLine="5270" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="5122" r="1714" b="5323">ing</wd>

<space/>

<wd l="1800" t="5122" r="2597" b="5323">approach</wd>

<space/>

<wd l="2669" t="5122" r="2928" b="5280">for</wd>

<space/>

<wd l="3000" t="5122" r="3710" b="5323">learning</wd>

<space/>

<wd l="3787" t="5122" r="4056" b="5280">the</wd>

<space/>

<wd l="4138" t="5122" r="5122" b="5323">appropriate</wd>

<space/>

<wd l="5198" t="5122" r="5803" b="5280">feature</wd>

<space/>

</ln>

<ln l="1440" t="5390" r="2160" b="5592" baseLine="5544" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="5390" r="2160" b="5592">weights.</wd>

</ln>

</para>

<para l="1440" t="5866" r="5333" b="6038" alignment="left" spaceBefore="224" lsp="exactly" lspExact="274" language="en">

<ln l="1440" t="5866" r="5333" b="6038" baseLine="6034" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="1440" t="5866" r="1555" b="6038">3</wd>

<space/>

<wd l="1805" t="5870" r="2198" b="6038">The</wd>

<space/>

<wd l="2261" t="5870" r="2741" b="6038">Task</wd>

<space/>

<wd l="2803" t="5866" r="3014" b="6038">of</wd>

<space/>

<wd l="3058" t="5866" r="3797" b="6038">Lexical</wd>

<space/>

<wd l="3864" t="5866" r="5333" b="6038">Normalization</wd>

</ln>

</para>

<para l="1440" t="6307" r="5808" b="10027" alignment="justified" spaceBefore="148" lsp="exactly" lspExact="271" language="en">

<ln l="1440" t="6307" r="5808" b="6509" baseLine="6461">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="6307" r="2266" b="6509">Formally,</wd>

<space/>

<wd l="2357" t="6307" r="2832" b="6509">given</wd>

<space/>

<wd l="2909" t="6360" r="3106" b="6466">an</wd>

<space/>

</run>

<wd l="3187" t="6307" r="3830" b="6509"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">m</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">-long</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3917" t="6360" r="4704" b="6509">sequence</wd>

<space/>

<wd l="4781" t="6307" r="4973" b="6466">of</wd>

<space/>

<wd l="5030" t="6307" r="5554" b="6466">words</wd>

<space/>

<wd l="5635" t="6307" r="5808" b="6461">in</wd>

<space/>

</run>

</ln>

<ln l="1440" t="6533" r="5794" b="6802" baseLine="6732">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="6581" r="1704" b="6739">the</wd>

<space/>

</run>

<wd l="1805" t="6538" r="2035" b="6739"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">i</run>

<run italic="true" underlined="none" subsuperscript="superscript" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">th</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2141" t="6605" r="2659" b="6768">tweet,</wd>

<space/>

</run>

<wd l="2779" t="6586" r="2957" b="6773"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">T</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3115" t="6653" r="3264" b="6706">=</wd>

<space/>

</run>

<wd l="3437" t="6566" r="3797" b="6802"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">[</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i,</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="3854" t="6595" r="4176" b="6802"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i,</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">2</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4238" t="6710" r="4267" b="6734">.</wd>

<space/>

<wd l="4334" t="6710" r="4363" b="6734">.</wd>

<space/>

<wd l="4430" t="6710" r="4459" b="6734">.</wd>

<space/>

<wd l="4526" t="6710" r="4560" b="6778">,</wd>

<space/>

</run>

<wd l="4618" t="6566" r="5059" b="6802"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i,m</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">]</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="5174" t="6581" r="5794" b="6782">partici-</wd>

</run>

</ln>

<ln l="1440" t="6850" r="5803" b="7051" baseLine="7003" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="6874" r="1891" b="7051">pants</wd>

<space/>

<wd l="1978" t="6850" r="2165" b="7008">of</wd>

<space/>

<wd l="2222" t="6850" r="2491" b="7008">the</wd>

<space/>

<wd l="2568" t="6850" r="3130" b="7008">shared</wd>

<space/>

<wd l="3202" t="6850" r="3552" b="7008">task</wd>

<space/>

<wd l="3624" t="6850" r="3936" b="7008">had</wd>

<space/>

<wd l="4008" t="6874" r="4171" b="7008">to</wd>

<space/>

<wd l="4248" t="6874" r="4771" b="7008">return</wd>

<space/>

<wd l="4843" t="6902" r="4939" b="7008">a</wd>

<space/>

<wd l="5016" t="6902" r="5803" b="7051">sequence</wd>

<space/>

</ln>

<ln l="1445" t="7118" r="5794" b="7325" baseLine="7273">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="7123" r="1637" b="7282">of</wd>

<space/>

<wd l="1680" t="7123" r="2659" b="7282">normalized</wd>

<space/>

<wd l="2717" t="7123" r="3917" b="7325">in-vocabulary</wd>

<space/>

<wd l="3984" t="7128" r="4339" b="7320">(IV)</wd>

<space/>

<wd l="4406" t="7123" r="4982" b="7310">words,</wd>

<space/>

<wd l="5050" t="7123" r="5304" b="7282">i.e.</wd>

<space/>

</run>

<wd l="5381" t="7118" r="5558" b="7315"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">S</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="5645" t="7195" r="5794" b="7248">=</wd>

<space/>

</run>

</ln>

<ln l="1464" t="7378" r="5808" b="7613" baseLine="7551">

<wd l="1464" t="7378" r="1848" b="7613"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">[</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i,</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="1910" t="7445" r="2251" b="7613"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i,</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">2</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2314" t="7522" r="2342" b="7546">.</wd>

<space/>

<wd l="2410" t="7522" r="2438" b="7546">.</wd>

<space/>

<wd l="2506" t="7522" r="2534" b="7546">.</wd>

<space/>

<wd l="2602" t="7522" r="2635" b="7589">,</wd>

<space/>

</run>

<wd l="2698" t="7378" r="3158" b="7613"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i,m</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">]</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3245" t="7392" r="3581" b="7550">The</wd>

<space/>

<wd l="3638" t="7392" r="4315" b="7594">training</wd>

<space/>

<wd l="4382" t="7416" r="4618" b="7550">set</wd>

<space/>

<wd l="4680" t="7392" r="4867" b="7550">of</wd>

<space/>

<wd l="4910" t="7392" r="5179" b="7550">the</wd>

<space/>

<wd l="5246" t="7392" r="5808" b="7550">shared</wd>

<space/>

</run>

</ln>

<ln l="1440" t="7666" r="5794" b="7867" baseLine="7814" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="7666" r="1795" b="7824">task</wd>

<space/>

<wd l="1862" t="7666" r="2669" b="7824">consisted</wd>

<space/>

<wd l="2741" t="7666" r="2928" b="7824">of</wd>

<space/>

<wd l="2986" t="7666" r="3466" b="7853">2,950</wd>

<space/>

<wd l="3538" t="7690" r="4085" b="7824">tweets</wd>

<space/>

<wd l="4162" t="7666" r="5136" b="7867">comprising</wd>

<space/>

<wd l="5203" t="7666" r="5794" b="7853">44,385</wd>

<space/>

</ln>

<ln l="1440" t="7934" r="5808" b="8122" baseLine="8088" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="7934" r="2050" b="8122">tokens,</wd>

<space/>

<wd l="2136" t="7934" r="2616" b="8093">while</wd>

<space/>

<wd l="2688" t="7934" r="2952" b="8093">the</wd>

<space/>

<wd l="3024" t="7958" r="3326" b="8093">test</wd>

<space/>

<wd l="3403" t="7958" r="3638" b="8093">set</wd>

<space/>

<wd l="3706" t="7934" r="4022" b="8093">had</wd>

<space/>

<wd l="4114" t="7934" r="4570" b="8122">1,967</wd>

<space/>

<wd l="4646" t="7958" r="5194" b="8093">tweets</wd>

<space/>

<wd l="5270" t="7934" r="5808" b="8093">which</wd>

<space/>

</ln>

<ln l="1440" t="8208" r="5803" b="8410" baseLine="8357" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="8208" r="2194" b="8366">included</wd>

<space/>

<wd l="2275" t="8261" r="2371" b="8366">a</wd>

<space/>

<wd l="2453" t="8208" r="2837" b="8366">total</wd>

<space/>

<wd l="2923" t="8208" r="3115" b="8366">of</wd>

<space/>

<wd l="3187" t="8213" r="3758" b="8395">29,421</wd>

<space/>

<wd l="3864" t="8208" r="4474" b="8366">tokens.</wd>

<space/>

<wd l="4632" t="8208" r="5549" b="8410">According</wd>

<space/>

<wd l="5635" t="8232" r="5803" b="8366">to</wd>

<space/>

</ln>

<ln l="1440" t="8477" r="5803" b="8678" baseLine="8630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="8477" r="1704" b="8635">the</wd>

<space/>

<wd l="1771" t="8477" r="2414" b="8664">dataset,</wd>

<space/>

<wd l="2491" t="8501" r="2918" b="8635">most</wd>

<space/>

<wd l="2981" t="8477" r="3173" b="8635">of</wd>

<space/>

<wd l="3216" t="8477" r="3485" b="8635">the</wd>

<space/>

<wd l="3542" t="8477" r="4066" b="8635">words</wd>

<space/>

<wd l="4142" t="8477" r="4416" b="8635">did</wd>

<space/>

<wd l="4474" t="8501" r="4757" b="8635">not</wd>

<space/>

<wd l="4814" t="8477" r="5434" b="8678">require</wd>

<space/>

<wd l="5501" t="8530" r="5803" b="8678">any</wd>

<space/>

</ln>

<ln l="1440" t="8750" r="5794" b="8952" baseLine="8899" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="8750" r="1829" b="8909">kind</wd>

<space/>

<wd l="1891" t="8750" r="2078" b="8909">of</wd>

<space/>

<wd l="2126" t="8750" r="3134" b="8938">corrections,</wd>

<space/>

<wd l="3202" t="8750" r="3461" b="8909">i.e.</wd>

<space/>

<wd l="3528" t="8750" r="3792" b="8909">the</wd>

<space/>

<wd l="3850" t="8750" r="4771" b="8952">proportion</wd>

<space/>

<wd l="4834" t="8750" r="5026" b="8909">of</wd>

<space/>

<wd l="5064" t="8750" r="5794" b="8909">unmodi-</wd>

</ln>

<ln l="1445" t="9019" r="5794" b="9178" baseLine="9168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="9019" r="1766" b="9178">fied</wd>

<space/>

<wd l="1834" t="9019" r="2357" b="9178">words</wd>

<space/>

<wd l="2429" t="9072" r="2755" b="9178">was</wd>

<space/>

<wd l="2832" t="9024" r="3494" b="9178">91.12%</wd>

<space/>

<wd l="3571" t="9019" r="3883" b="9178">and</wd>

<space/>

<wd l="3950" t="9019" r="4608" b="9178">90.57%</wd>

<space/>

<wd l="4685" t="9019" r="4939" b="9178">for</wd>

<space/>

<wd l="5002" t="9019" r="5266" b="9178">the</wd>

<space/>

<wd l="5333" t="9019" r="5794" b="9178">train-</wd>

</ln>

<ln l="1440" t="9293" r="5808" b="9494" baseLine="9442" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="9293" r="1714" b="9494">ing</wd>

<space/>

<wd l="1790" t="9293" r="2098" b="9451">and</wd>

<space/>

<wd l="2165" t="9317" r="2467" b="9451">test</wd>

<space/>

<wd l="2539" t="9317" r="2818" b="9480">set,</wd>

<space/>

<wd l="2894" t="9293" r="3970" b="9494">respectively.</wd>

<space/>

<wd l="4075" t="9293" r="4723" b="9451">Further</wd>

<space/>

<wd l="4790" t="9293" r="5347" b="9451">details</wd>

<space/>

<wd l="5419" t="9293" r="5808" b="9451">with</wd>

<space/>

</ln>

<ln l="1440" t="9562" r="5808" b="9763" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="9586" r="2059" b="9763">respect</wd>

<space/>

<wd l="2126" t="9562" r="2390" b="9720">the</wd>

<space/>

<wd l="2472" t="9562" r="3034" b="9720">shared</wd>

<space/>

<wd l="3101" t="9562" r="3456" b="9720">task</wd>

<space/>

<wd l="3528" t="9614" r="3821" b="9720">can</wd>

<space/>

<wd l="3893" t="9562" r="4099" b="9720">be</wd>

<space/>

<wd l="4166" t="9562" r="4680" b="9720">found</wd>

<space/>

<wd l="4747" t="9562" r="4915" b="9715">in</wd>

<space/>

<wd l="4987" t="9562" r="5251" b="9720">the</wd>

<space/>

<wd l="5323" t="9614" r="5808" b="9763">paper</wd>

<space/>

</ln>

<ln l="1450" t="9830" r="3394" b="10027" baseLine="9984" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="9830" r="2251" b="10027">(Baldwin</wd>

<space/>

<wd l="2314" t="9854" r="2467" b="9989">et</wd>

<space/>

<wd l="2525" t="9830" r="2774" b="10018">al.,</wd>

<space/>

<wd l="2846" t="9830" r="3394" b="10027">2015).</wd>

</ln>

</para>

<para l="1440" t="10109" r="5818" b="13834" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="1656" t="10109" r="5803" b="10310" baseLine="10258" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1656" t="10114" r="1891" b="10267">As</wd>

<space/>

<wd l="2011" t="10162" r="2107" b="10267">a</wd>

<space/>

<wd l="2213" t="10162" r="3365" b="10310">consequence,</wd>

<space/>

<wd l="3494" t="10162" r="3749" b="10267">we</wd>

<space/>

<wd l="3859" t="10109" r="4200" b="10267">first</wd>

<space/>

<wd l="4301" t="10109" r="4699" b="10267">built</wd>

<space/>

<wd l="4810" t="10162" r="4906" b="10267">a</wd>

<space/>

<wd l="5016" t="10162" r="5803" b="10310">sequence</wd>

<space/>

</ln>

<ln l="1440" t="10378" r="5808" b="10536" baseLine="10531">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1440" t="10378" r="1982" b="10536">model</wd>

<space/>

<wd l="2035" t="10402" r="2203" b="10536">to</wd>

<space/>

<wd l="2261" t="10378" r="2822" b="10536">decide</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="2875" t="10378" r="3389" b="10536">which</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="3442" t="10378" r="3998" b="10536">tokens</wd>

<space/>

<wd l="4056" t="10378" r="4469" b="10536">need</wd>

<space/>

<wd l="4517" t="10402" r="4685" b="10536">to</wd>

<space/>

<wd l="4738" t="10378" r="4944" b="10536">be</wd>

<space/>

<wd l="4997" t="10378" r="5808" b="10536">corrected</wd>

<space/>

</run>

</ln>

<ln l="1445" t="10651" r="5794" b="10853" baseLine="10800">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1445" t="10651" r="1757" b="10810">and</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1824" t="10661" r="1982" b="10810">in</wd>

<space/>

<wd l="2050" t="10651" r="2477" b="10810">what</wd>

<space/>

</run>

<wd l="2534" t="10704" r="2928" b="10853"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">way</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="3029" t="10656" r="3187" b="10805">A</wd>

<space/>

<wd l="3250" t="10651" r="3840" b="10853">typical</wd>

<space/>

<wd l="3907" t="10651" r="4829" b="10810">distinction</wd>

<space/>

<wd l="4891" t="10651" r="5083" b="10810">of</wd>

<space/>

<wd l="5131" t="10651" r="5395" b="10810">the</wd>

<space/>

<wd l="5462" t="10704" r="5794" b="10810">cor-</wd>

</run>

</ln>

<ln l="1440" t="10920" r="5818" b="11122" baseLine="11069" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="10920" r="2050" b="11078">rection</wd>

<space/>

<wd l="2136" t="10944" r="2587" b="11122">types</wd>

<space/>

<wd l="2688" t="10920" r="3230" b="11078">would</wd>

<space/>

<wd l="3322" t="10920" r="3523" b="11078">be</wd>

<space/>

<wd l="3614" t="10920" r="4114" b="11078">based</wd>

<space/>

<wd l="4210" t="10973" r="4421" b="11078">on</wd>

<space/>

<wd l="4512" t="10920" r="4776" b="11078">the</wd>

<space/>

<wd l="4867" t="10920" r="5539" b="11078">number</wd>

<space/>

<wd l="5630" t="10920" r="5818" b="11078">of</wd>

<space/>

</ln>

<ln l="1440" t="11194" r="5794" b="11395" baseLine="11342" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="11194" r="2002" b="11352">tokens</wd>

<space/>

<wd l="2078" t="11246" r="2174" b="11352">a</wd>

<space/>

<wd l="2242" t="11194" r="2707" b="11395">noisy</wd>

<space/>

<wd l="2779" t="11194" r="3264" b="11352">token</wd>

<space/>

<wd l="3336" t="11194" r="3648" b="11352">and</wd>

<space/>

<wd l="3710" t="11194" r="3912" b="11352">its</wd>

<space/>

<wd l="3989" t="11194" r="4800" b="11352">corrected</wd>

<space/>

<wd l="4867" t="11194" r="5294" b="11352">form</wd>

<space/>

<wd l="5362" t="11246" r="5794" b="11352">com-</wd>

</ln>

<ln l="1440" t="11462" r="5808" b="11664" baseLine="11611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="11462" r="1939" b="11664">prises</wd>

<space/>

<wd l="2026" t="11462" r="2246" b="11621">of.</wd>

<space/>

<wd l="2381" t="11462" r="3298" b="11664">According</wd>

<space/>

<wd l="3374" t="11486" r="3538" b="11621">to</wd>

<space/>

<wd l="3614" t="11462" r="3922" b="11621">this</wd>

<space/>

<wd l="4008" t="11462" r="4843" b="11664">approach,</wd>

<space/>

<wd l="4939" t="11515" r="5246" b="11621">one</wd>

<space/>

<wd l="5323" t="11462" r="5808" b="11621">could</wd>

<space/>

</ln>

<ln l="1445" t="11731" r="5808" b="11933" baseLine="11885" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="11731" r="2400" b="11933">distinguish</wd>

<space/>

<wd l="2462" t="11731" r="3187" b="11890">between</wd>

<space/>

<wd l="3254" t="11755" r="4238" b="11918">one-to-one,</wd>

<space/>

<wd l="4320" t="11755" r="5424" b="11933">one-to-many</wd>

<space/>

<wd l="5496" t="11731" r="5808" b="11890">and</wd>

<space/>

</ln>

<ln l="1440" t="12005" r="5794" b="12206" baseLine="12154" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="12029" r="2549" b="12206">many-to-one</wd>

<space/>

<wd l="2650" t="12005" r="3605" b="12163">corrections</wd>

<space/>

<wd l="3710" t="12058" r="3922" b="12163">on</wd>

<space/>

<wd l="4013" t="12005" r="4277" b="12163">the</wd>

<space/>

<wd l="4373" t="12058" r="4656" b="12206">per</wd>

<space/>

<wd l="4742" t="12005" r="5222" b="12163">token</wd>

<space/>

<wd l="5314" t="12005" r="5794" b="12163">basis.</wd>

<space/>

</ln>

<ln l="1440" t="12274" r="5818" b="12475" baseLine="12427" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="12278" r="2261" b="12461">However,</wd>

<space/>

<wd l="2357" t="12274" r="2976" b="12432">instead</wd>

<space/>

<wd l="3053" t="12274" r="3245" b="12432">of</wd>

<space/>

<wd l="3312" t="12274" r="4070" b="12475">applying</wd>

<space/>

<wd l="4152" t="12274" r="4416" b="12432">the</wd>

<space/>

<wd l="4498" t="12274" r="5006" b="12432">above</wd>

<space/>

<wd l="5088" t="12298" r="5539" b="12475">types</wd>

<space/>

<wd l="5630" t="12274" r="5818" b="12432">of</wd>

<space/>

</ln>

<ln l="1445" t="12547" r="5794" b="12749" baseLine="12696" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="12547" r="2453" b="12734">corrections,</wd>

<space/>

<wd l="2530" t="12600" r="2784" b="12706">we</wd>

<space/>

<wd l="2851" t="12547" r="3677" b="12706">identified</wd>

<space/>

<wd l="3744" t="12600" r="3840" b="12706">a</wd>

<space/>

<wd l="3898" t="12600" r="4349" b="12706">more</wd>

<space/>

<wd l="4416" t="12547" r="5102" b="12706">detailed</wd>

<space/>

<wd l="5170" t="12571" r="5794" b="12749">catego-</wd>

</ln>

<ln l="1440" t="12816" r="5808" b="13018" baseLine="12970" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="12816" r="2107" b="12974">rization</wd>

<space/>

<wd l="2160" t="12816" r="2352" b="12974">of</wd>

<space/>

<wd l="2386" t="12816" r="2650" b="12974">the</wd>

<space/>

<wd l="2707" t="12816" r="3586" b="12974">correction</wd>

<space/>

<wd l="3634" t="12840" r="4090" b="13018">types</wd>

<space/>

<wd l="4147" t="12816" r="4459" b="12974">and</wd>

<space/>

<wd l="4507" t="12816" r="5117" b="12974">trained</wd>

<space/>

<wd l="5170" t="12869" r="5266" b="12974">a</wd>

<space/>

<wd l="5309" t="12816" r="5808" b="12974">linear</wd>

<space/>

</ln>

<ln l="1445" t="13090" r="5794" b="13291" baseLine="13238" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="13090" r="1915" b="13248">chain</wd>

<space/>

<wd l="2016" t="13094" r="2424" b="13248">CRF</wd>

<space/>

<wd l="2525" t="13090" r="3250" b="13291">utilizing</wd>

<space/>

<wd l="3355" t="13090" r="4176" b="13248">CRFsuite</wd>

<space/>

<wd l="4286" t="13090" r="5122" b="13286">(Okazaki,</wd>

<space/>

<wd l="5246" t="13094" r="5794" b="13286">2007).</wd>

<space/>

</ln>

<ln l="1440" t="13358" r="5808" b="13560" baseLine="13512" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="13358" r="1781" b="13517">The</wd>

<space/>

<wd l="1872" t="13358" r="2750" b="13517">correction</wd>

<space/>

<wd l="2837" t="13382" r="3288" b="13560">types</wd>

<space/>

<wd l="3384" t="13411" r="3480" b="13517">a</wd>

<space/>

<wd l="3562" t="13358" r="4046" b="13517">token</wd>

<space/>

<wd l="4138" t="13358" r="4618" b="13517">could</wd>

<space/>

<wd l="4704" t="13358" r="4906" b="13517">be</wd>

<space/>

<wd l="4997" t="13358" r="5808" b="13517">classified</wd>

<space/>

</ln>

<ln l="1445" t="13632" r="3360" b="13834" baseLine="13781" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="13685" r="1613" b="13790">as</wd>

<space/>

<wd l="1675" t="13685" r="2098" b="13790">were</wd>

<space/>

<wd l="2155" t="13632" r="2419" b="13790">the</wd>

<space/>

<wd l="2477" t="13632" r="3360" b="13834">following:</wd>

</ln>

</para>

<para l="1675" t="14093" r="5803" b="14842" alignment="justified" li="432" spaceBefore="203" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1675" t="14093" r="5803" b="14299" baseLine="14246">

<wd l="1675" t="14098" r="1882" b="14246">•</wd>

<tab position="1758"/>

<wd l="1882" t="14093" r="3235" b="14299"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">MissingApos</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3312" t="14098" r="4037" b="14299">standing</wd>

<space/>

<wd l="4099" t="14098" r="4358" b="14256">for</wd>

<space/>

<wd l="4411" t="14098" r="4968" b="14256">tokens</wd>

<space/>

<wd l="5035" t="14098" r="5362" b="14256">that</wd>

<space/>

<wd l="5424" t="14098" r="5803" b="14299">only</wd>

<space/>

</run>

</ln>

<ln l="1882" t="14366" r="5794" b="14525" baseLine="14520" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1882" t="14366" r="2362" b="14525">differ</wd>

<space/>

<wd l="2419" t="14366" r="2846" b="14525">from</wd>

<space/>

<wd l="2914" t="14366" r="3317" b="14525">their</wd>

<space/>

<wd l="3384" t="14366" r="4190" b="14525">corrected</wd>

<space/>

<wd l="4253" t="14366" r="4896" b="14525">version</wd>

<space/>

<wd l="4958" t="14366" r="5131" b="14520">in</wd>

<space/>

<wd l="5194" t="14366" r="5462" b="14525">the</wd>

<space/>

<wd l="5530" t="14366" r="5794" b="14525">ab-</wd>

</ln>

<ln l="1886" t="14630" r="5755" b="14842" baseLine="14789">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1886" t="14693" r="2362" b="14798">sence</wd>

<space/>

<wd l="2419" t="14640" r="2611" b="14798">of</wd>

<space/>

<wd l="2654" t="14693" r="2856" b="14798">an</wd>

<space/>

<wd l="2918" t="14640" r="3869" b="14842">apostrophe</wd>

<space/>

<wd l="3936" t="14645" r="4301" b="14842">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4358" t="14640" r="4805" b="14842">youll</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4867" t="14698" r="5064" b="14779">→</wd>

<space/>

</run>

<wd l="5122" t="14640" r="5755" b="14842"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">you’ll</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1675" t="15101" r="5803" b="15307" alignment="justified" li="432" spaceBefore="192" fli="-216" lsp="exactly" lspExact="263" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1675" t="15101" r="5803" b="15307" baseLine="15259">

<wd l="1675" t="15106" r="1882" b="15259">•</wd>

<tab position="1761"/>

<wd l="1882" t="15101" r="3134" b="15307"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">MissingWS</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3"><space/>

<wd l="3235" t="15106" r="3965" b="15307">standing</wd>

<space/>

<wd l="4042" t="15106" r="4301" b="15264">for</wd>

<space/>

<wd l="4373" t="15106" r="4930" b="15264">tokens</wd>

<space/>

<wd l="5016" t="15106" r="5342" b="15264">that</wd>

<space/>

<wd l="5424" t="15106" r="5803" b="15307">only</wd>

</run>

</ln>

</para>

</column>

<column l="6142" t="1205" r="10524" b="15317">

<table l="6610" t="1205" r="10020" b="3770" alignment="left" li="468" ri="504" spaceAfter="144">

<gridTable>

<gridCol>1583</gridCol>

<gridCol>984</gridCol>

<gridCol>843</gridCol>

<gridRow>326</gridRow>

<gridRow>250</gridRow>

<gridRow>278</gridRow>

<gridRow>279</gridRow>

<gridRow>278</gridRow>

<gridRow>279</gridRow>

<gridRow>249</gridRow>

<gridRow>322</gridRow>

<gridRow>304</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6610" t="1205" r="8193" b="1531" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="8314" t="1291" r="9053" b="1493" alignment="centered" spaceAfter="26" lsp="exactly" lspExact="271" language="en">

<ln l="8314" t="1291" r="9053" b="1493" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8314" t="1291" r="9053" b="1493">Training</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="9418" t="1296" r="9778" b="1450" alignment="centered" spaceAfter="26" lsp="exactly" lspExact="271" language="en">

<ln l="9418" t="1296" r="9778" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9418" t="1296" r="9778" b="1450">Test</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<para l="6763" t="1565" r="8069" b="1771" alignment="left" li="153" lsp="exactly" lspExact="240" language="en">

<ln l="6763" t="1565" r="8069" b="1771" baseLine="1723" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="6763" t="1565" r="8069" b="1771">MissingApos</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<para l="8525" t="1570" r="8842" b="1728" alignment="centered" lsp="exactly" lspExact="240" language="en">

<ln l="8525" t="1570" r="8842" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8525" t="1570" r="8842" b="1728">507</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="9437" t="1570" r="9754" b="1728" alignment="centered" lsp="exactly" lspExact="240" language="en">

<ln l="9437" t="1570" r="9754" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9437" t="1570" r="9754" b="1728">369</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6763" t="1838" r="7973" b="2045" alignment="left" li="153" lsp="exactly" lspExact="256" language="en">

<ln l="6763" t="1838" r="7973" b="2045" baseLine="1992" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="6763" t="1838" r="7973" b="2045">MissingWS</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="8544" t="1843" r="8846" b="2002" alignment="centered" lsp="exactly" lspExact="263" language="en">

<ln l="8544" t="1843" r="8846" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7">

<wd l="8544" t="1843" r="8846" b="2002">126</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="9490" t="1843" r="9701" b="2002" alignment="centered" lsp="exactly" lspExact="263" language="en">

<ln l="9490" t="1843" r="9701" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9490" t="1843" r="9701" b="2002">76</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6778" t="2117" r="7646" b="2328" alignment="left" li="153" lsp="exactly" lspExact="254" language="en">

<ln l="6778" t="2117" r="7646" b="2328" baseLine="2284">

<wd l="6778" t="2117" r="7646" b="2328"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">≤</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">2</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="8462" t="2117" r="8923" b="2299" alignment="centered" spaceAfter="11" lsp="exactly" lspExact="262" language="en">

<ln l="8462" t="2117" r="8923" b="2299" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="8462" t="2117" r="8923" b="2299">1,979</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="9374" t="2112" r="9830" b="2299" alignment="centered" spaceAfter="11" lsp="exactly" lspExact="262" language="en">

<ln l="9374" t="2112" r="9830" b="2299" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6">

<wd l="9374" t="2112" r="9830" b="2299">1,405</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6778" t="2390" r="7651" b="2602" alignment="left" li="153" lsp="exactly" lspExact="259" language="en">

<ln l="6778" t="2390" r="7651" b="2602" baseLine="2552">

<wd l="6778" t="2390" r="7651" b="2602"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">&gt;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">3</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="8520" t="2390" r="8837" b="2544" alignment="centered" spaceAfter="17" lsp="exactly" lspExact="251" language="en">

<ln l="8520" t="2390" r="8837" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8520" t="2390" r="8837" b="2544">413</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<para l="9437" t="2386" r="9758" b="2544" alignment="centered" spaceAfter="17" lsp="exactly" lspExact="251" language="en">

<ln l="9437" t="2386" r="9758" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9437" t="2390" r="9758" b="2544">292</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6778" t="2654" r="7656" b="2842" alignment="left" li="153" spaceAfter="5" lsp="exactly" lspExact="271" language="en">

<ln l="6778" t="2654" r="7656" b="2842" baseLine="2825">

<wd l="6778" t="2654" r="7656" b="2842"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="6">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="6">toM</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">ABB</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="8525" t="2659" r="8842" b="2813" alignment="centered" spaceAfter="31" lsp="exactly" lspExact="247" language="en">

<ln l="8525" t="2659" r="8842" b="2813" baseLine="2808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8525" t="2659" r="8842" b="2813">917</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="9437" t="2654" r="9758" b="2813" alignment="centered" spaceAfter="31" lsp="exactly" lspExact="247" language="en">

<ln l="9437" t="2654" r="9758" b="2813" baseLine="2808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9437" t="2654" r="9758" b="2813">634</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<para l="6768" t="2928" r="7560" b="3091" alignment="left" li="153" lsp="exactly" lspExact="244" language="en">

<ln l="6768" t="2928" r="7560" b="3091" baseLine="3086" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="6768" t="2928" r="7560" b="3091">Subtotal</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<para l="8443" t="2933" r="8928" b="3120" alignment="centered" lsp="exactly" lspExact="244" language="en">

<ln l="8443" t="2933" r="8928" b="3120" baseLine="3086" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8443" t="2938" r="8928" b="3120">3,942</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="9355" t="2933" r="9840" b="3120" alignment="centered" lsp="exactly" lspExact="244" language="en">

<ln l="9355" t="2933" r="9840" b="3120" baseLine="3086" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9355" t="2933" r="9840" b="3120">2,776</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<bottomBorder type="double" width="48"/>

<para l="6768" t="3202" r="6922" b="3365" alignment="left" li="153" spaceAfter="47" lsp="exactly" lspExact="258" language="en">

<ln l="6768" t="3202" r="6922" b="3365" baseLine="3355" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6768" t="3202" r="6922" b="3365">O</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<bottomBorder type="double" width="48"/>

<para l="8386" t="3211" r="8971" b="3394" alignment="centered" spaceAfter="46" lsp="exactly" lspExact="266" language="en">

<ln l="8386" t="3211" r="8971" b="3394" baseLine="3355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8386" t="3211" r="8971" b="3394">40,443</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<bottomBorder type="double" width="48"/>

<para l="9302" t="3206" r="9883" b="3394" alignment="centered" spaceAfter="46" lsp="exactly" lspExact="266" language="en">

<ln l="9302" t="3206" r="9883" b="3394" baseLine="3355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9302" t="3206" r="9883" b="3394">26,645</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="8" gridRowTill="8" alignment="left" verticalAlignment="middle">

<topBorder type="double" width="48"/>

<rightBorder type="single" width="10"/>

<para l="6763" t="3523" r="7277" b="3682" alignment="left" li="153" spaceAfter="27" lsp="exactly" lspExact="258" language="en">

<ln l="6763" t="3523" r="7277" b="3682" baseLine="3677" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="6763" t="3523" r="7277" b="3682">Total</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="8" gridRowTill="8" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="double" width="48"/>

<para l="8386" t="3523" r="8971" b="3710" alignment="centered" spaceAfter="26" lsp="exactly" lspExact="266" language="en">

<ln l="8386" t="3523" r="8971" b="3710" baseLine="3677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8386" t="3523" r="8971" b="3710">44,385</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="8" gridRowTill="8" alignment="left" verticalAlignment="middle">

<topBorder type="double" width="48"/>

<para l="9302" t="3523" r="9874" b="3710" alignment="centered" spaceAfter="26" lsp="exactly" lspExact="266" language="en">

<ln l="9302" t="3523" r="9874" b="3710" baseLine="3677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="9302" t="3528" r="9874" b="3710">29,421</wd>

</ln>

</para>

</cell>

</table>

<para l="6144" t="3974" r="10512" b="4445" alignment="justified" lsp="exactly" lspExact="269" language="en">

<ln l="6144" t="3974" r="10512" b="4176" baseLine="4123" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="3974" r="6624" b="4133">Table</wd>

<space/>

<wd l="6701" t="3979" r="6830" b="4133">1:</wd>

<space/>

<wd l="6912" t="3974" r="7968" b="4133">Distribution</wd>

<space/>

<wd l="8021" t="3974" r="8213" b="4133">of</wd>

<space/>

<wd l="8251" t="3974" r="8515" b="4133">the</wd>

<space/>

<wd l="8578" t="3974" r="9456" b="4133">correction</wd>

<space/>

<wd l="9509" t="3998" r="9960" b="4176">types</wd>

<space/>

<wd l="10022" t="3974" r="10195" b="4128">in</wd>

<space/>

<wd l="10243" t="3974" r="10512" b="4133">the</wd>

<space/>

</ln>

<ln l="6144" t="4243" r="7925" b="4445" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="4243" r="6821" b="4445">training</wd>

<space/>

<wd l="6883" t="4243" r="7195" b="4402">and</wd>

<space/>

<wd l="7248" t="4267" r="7555" b="4402">test</wd>

<space/>

<wd l="7618" t="4267" r="7925" b="4402">sets</wd>

</ln>

</para>

<para l="6586" t="4896" r="10502" b="5640" alignment="justified" li="432" spaceBefore="385" lsp="exactly" lspExact="271" language="en">

<ln l="6586" t="4896" r="10502" b="5054" baseLine="5045" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6586" t="4896" r="7066" b="5054">differ</wd>

<space/>

<wd l="7128" t="4896" r="7555" b="5054">from</wd>

<space/>

<wd l="7618" t="4896" r="8021" b="5054">their</wd>

<space/>

<wd l="8088" t="4896" r="8894" b="5054">corrected</wd>

<space/>

<wd l="8962" t="4896" r="9600" b="5054">version</wd>

<space/>

<wd l="9662" t="4896" r="9835" b="5050">in</wd>

<space/>

<wd l="9902" t="4896" r="10166" b="5054">the</wd>

<space/>

<wd l="10238" t="4896" r="10502" b="5054">ab-</wd>

</ln>

<ln l="6590" t="5165" r="10502" b="5366" baseLine="5314" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6590" t="5218" r="7066" b="5323">sence</wd>

<space/>

<wd l="7147" t="5165" r="7339" b="5323">of</wd>

<space/>

<wd l="7406" t="5218" r="7718" b="5323">one</wd>

<space/>

<wd l="7800" t="5218" r="7982" b="5323">or</wd>

<space/>

<wd l="8054" t="5218" r="8501" b="5323">more</wd>

<space/>

<wd l="8582" t="5165" r="9547" b="5366">whitespace</wd>

<space/>

<wd l="9634" t="5165" r="10502" b="5323">characters</wd>

<space/>

</ln>

<ln l="6590" t="5424" r="9811" b="5640" baseLine="5587">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6590" t="5443" r="6960" b="5640">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7022" t="5438" r="8141" b="5640">whataburger</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8208" t="5496" r="8405" b="5578">→</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8467" t="5438" r="8899" b="5597">what</wd>

<space/>

<wd l="8947" t="5491" r="9053" b="5597">a</wd>

<space/>

</run>

<wd l="9115" t="5438" r="9811" b="5640"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">burger</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6384" t="5870" r="10512" b="7435" alignment="justified" li="432" spaceBefore="169" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6384" t="5880" r="10512" b="6096" baseLine="6032">

<wd l="6384" t="5885" r="6600" b="6029">•</wd>

<tab position="6465"/>

<wd l="6600" t="5885" r="7531" b="6096"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">≤</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7680" t="5880" r="8410" b="6082">standing</wd>

<space/>

<wd l="8525" t="5880" r="8784" b="6038">for</wd>

<space/>

<wd l="8899" t="5880" r="9854" b="6038">corrections</wd>

<space/>

<wd l="9979" t="5880" r="10512" b="6038">where</wd>

<space/>

</run>

</ln>

<ln l="6581" t="6149" r="10512" b="6350" baseLine="6302" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6581" t="6202" r="6797" b="6307">no</wd>

<space/>

<wd l="6878" t="6149" r="7848" b="6350">whitespace</wd>

<space/>

<wd l="7934" t="6149" r="8803" b="6307">characters</wd>

<space/>

<wd l="8890" t="6149" r="9206" b="6307">had</wd>

<space/>

<wd l="9288" t="6173" r="9451" b="6307">to</wd>

<space/>

<wd l="9533" t="6149" r="9739" b="6307">be</wd>

<space/>

<wd l="9821" t="6149" r="10512" b="6307">inserted</wd>

<space/>

</ln>

<ln l="6586" t="6422" r="10512" b="6624" baseLine="6571" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6586" t="6422" r="6898" b="6581">and</wd>

<space/>

<wd l="6965" t="6422" r="7229" b="6581">the</wd>

<space/>

<wd l="7306" t="6422" r="8261" b="6624">augmented</wd>

<space/>

<wd l="8333" t="6422" r="8654" b="6581">edit</wd>

<space/>

<wd l="8726" t="6422" r="9437" b="6581">distance</wd>

<space/>

<wd l="9514" t="6422" r="10512" b="6619">(introduced</wd>

<space/>

</ln>

<ln l="6581" t="6691" r="10512" b="6893" baseLine="6840" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6581" t="6691" r="6754" b="6845">in</wd>

<space/>

<wd l="6893" t="6691" r="7541" b="6850">Section</wd>

<space/>

<wd l="7675" t="6696" r="8011" b="6888">4.2)</wd>

<space/>

<wd l="8155" t="6691" r="8885" b="6850">between</wd>

<space/>

<wd l="9019" t="6691" r="9283" b="6850">the</wd>

<space/>

<wd l="9422" t="6691" r="9888" b="6893">noisy</wd>

<space/>

<wd l="10027" t="6691" r="10512" b="6850">token</wd>

<space/>

</ln>

<ln l="6586" t="6965" r="10507" b="7123" baseLine="7114" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6586" t="6965" r="6898" b="7123">and</wd>

<space/>

<wd l="7037" t="6965" r="7234" b="7123">its</wd>

<space/>

<wd l="7382" t="6965" r="8366" b="7123">normalized</wd>

<space/>

<wd l="8501" t="6965" r="8928" b="7123">form</wd>

<space/>

<wd l="9067" t="7018" r="9394" b="7123">was</wd>

<space/>

<wd l="9547" t="6989" r="9701" b="7123">at</wd>

<space/>

<wd l="9840" t="6989" r="10262" b="7123">most</wd>

<space/>

<wd l="10406" t="6970" r="10507" b="7118">2</wd>

<space/>

</ln>

<ln l="6590" t="7238" r="8659" b="7435" baseLine="7382">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6590" t="7238" r="6960" b="7435">(e.g.</wd>

<space/>

</run>

<wd l="7027" t="7238" r="8659" b="7430"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">tmrw</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">→</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">tomorrow</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6384" t="7675" r="10512" b="8693" alignment="justified" li="432" spaceBefore="174" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6384" t="7675" r="10507" b="7891" baseLine="7832">

<wd l="6384" t="7680" r="6600" b="7829">•</wd>

<tab position="6467"/>

<wd l="6600" t="7680" r="7531" b="7891"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">&gt;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">3</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7603" t="7675" r="8328" b="7877">standing</wd>

<space/>

<wd l="8386" t="7675" r="8640" b="7834">for</wd>

<space/>

<wd l="8693" t="7675" r="9648" b="7834">corrections</wd>

<space/>

<wd l="9710" t="7675" r="10243" b="7834">where</wd>

<space/>

<wd l="10291" t="7728" r="10507" b="7834">no</wd>

<space/>

</run>

</ln>

<ln l="6581" t="7949" r="10512" b="8150" baseLine="8098" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6581" t="7949" r="7550" b="8150">whitespace</wd>

<space/>

<wd l="7618" t="7949" r="8491" b="8107">characters</wd>

<space/>

<wd l="8563" t="7949" r="8880" b="8107">had</wd>

<space/>

<wd l="8942" t="7973" r="9106" b="8107">to</wd>

<space/>

<wd l="9173" t="7949" r="9379" b="8107">be</wd>

<space/>

<wd l="9442" t="7949" r="10133" b="8107">inserted</wd>

<space/>

<wd l="10200" t="7949" r="10512" b="8107">and</wd>

<space/>

</ln>

<ln l="6581" t="8218" r="10498" b="8419" baseLine="8366" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6581" t="8218" r="6850" b="8376">the</wd>

<space/>

<wd l="6946" t="8218" r="7901" b="8419">augmented</wd>

<space/>

<wd l="7997" t="8218" r="8323" b="8376">edit</wd>

<space/>

<wd l="8419" t="8218" r="9130" b="8376">distance</wd>

<space/>

<wd l="9226" t="8270" r="9552" b="8376">was</wd>

<space/>

<wd l="9662" t="8242" r="9816" b="8376">at</wd>

<space/>

<wd l="9907" t="8218" r="10310" b="8376">least</wd>

<space/>

<wd l="10406" t="8222" r="10498" b="8376">3</wd>

<space/>

</ln>

<ln l="6590" t="8491" r="8170" b="8693" baseLine="8640">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6590" t="8496" r="6960" b="8693">(e.g.</wd>

<space/>

</run>

<wd l="7003" t="8491" r="8170" b="8693"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">plz</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">→</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">please</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6384" t="8933" r="10512" b="9946" alignment="justified" li="432" spaceBefore="169" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6384" t="8933" r="10512" b="9134" baseLine="9084">

<wd l="6384" t="8938" r="6600" b="9082">•</wd>

<tab position="6465"/>

<wd l="6600" t="8933" r="7541" b="9120"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">toM</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-4">ABB</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><space/>

<wd l="7685" t="8933" r="8414" b="9134">standing</wd>

<space/>

<wd l="8530" t="8933" r="8789" b="9091">for</wd>

<space/>

<wd l="8899" t="8933" r="9859" b="9091">corrections</wd>

<space/>

<wd l="9979" t="8933" r="10512" b="9091">where</wd>

<space/>

</run>

</ln>

<ln l="6581" t="9202" r="10502" b="9403" baseLine="9355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="9202" r="6970" b="9360">both</wd>

<space/>

<wd l="7018" t="9202" r="7982" b="9403">whitespace</wd>

<space/>

<wd l="8035" t="9202" r="8347" b="9360">and</wd>

<space/>

<wd l="8400" t="9202" r="9576" b="9403">alphanumeric</wd>

<space/>

<wd l="9634" t="9202" r="10502" b="9360">characters</wd>

<space/>

</ln>

<ln l="6581" t="9475" r="10512" b="9634" baseLine="9624" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6581" t="9475" r="6898" b="9634">had</wd>

<space/>

<wd l="6941" t="9499" r="7109" b="9634">to</wd>

<space/>

<wd l="7157" t="9475" r="7363" b="9634">be</wd>

<space/>

<wd l="7406" t="9475" r="8098" b="9634">inserted</wd>

<space/>

<wd l="8141" t="9499" r="8309" b="9634">to</wd>

<space/>

<wd l="8362" t="9475" r="8904" b="9634">obtain</wd>

<space/>

<wd l="8952" t="9528" r="9048" b="9634">a</wd>

<space/>

<wd l="9086" t="9475" r="9648" b="9634">tokens</wd>

<space/>

<wd l="9706" t="9475" r="10512" b="9634">corrected</wd>

<space/>

</ln>

<ln l="6581" t="9734" r="9634" b="9946" baseLine="9893">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="6581" t="9744" r="7186" b="9902">variant</wd>

<space/>

<wd l="7248" t="9749" r="7613" b="9946">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="7685" t="9744" r="7910" b="9902">lol</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="7973" t="9802" r="8170" b="9883">→</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="8242" t="9744" r="8731" b="9946">laugh</wd>

<space/>

<wd l="8794" t="9778" r="9072" b="9902">out</wd>

<space/>

</run>

<wd l="9125" t="9744" r="9634" b="9941"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">loud</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6144" t="10176" r="10517" b="12274" alignment="justified" spaceBefore="152" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="10176" r="10517" b="10378" baseLine="10325" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="10181" r="6451" b="10334">For</wd>

<space/>

<wd l="6494" t="10176" r="6758" b="10334">the</wd>

<space/>

<wd l="6816" t="10176" r="7190" b="10334">sake</wd>

<space/>

<wd l="7243" t="10176" r="7435" b="10334">of</wd>

<space/>

<wd l="7474" t="10176" r="8688" b="10378">completeness,</wd>

<space/>

<wd l="8746" t="10229" r="9000" b="10334">we</wd>

<space/>

<wd l="9058" t="10176" r="9634" b="10334">should</wd>

<space/>

<wd l="9682" t="10176" r="9994" b="10334">add</wd>

<space/>

<wd l="10042" t="10176" r="10368" b="10334">that</wd>

<space/>

<wd l="10421" t="10229" r="10517" b="10334">a</wd>

<space/>

</ln>

<ln l="6144" t="10445" r="10502" b="10646" baseLine="10598">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6144" t="10445" r="6744" b="10603">further</wd>

<space/>

<wd l="6806" t="10445" r="7219" b="10603">class</wd>

<space/>

<wd l="7291" t="10445" r="7714" b="10603">label</wd>

<space/>

</run>

<wd l="7790" t="10450" r="8074" b="10642"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">O</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="8146" t="10498" r="8477" b="10603">was</wd>

<space/>

<wd l="8554" t="10445" r="9451" b="10646">employed.</wd>

<space/>

<wd l="9557" t="10445" r="9989" b="10632">This,</wd>

<space/>

<wd l="10070" t="10445" r="10502" b="10603">how-</wd>

</run>

</ln>

<ln l="6149" t="10718" r="10507" b="10920" baseLine="10867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="10771" r="6547" b="10906">ever,</wd>

<space/>

<wd l="6629" t="10718" r="7800" b="10920">corresponded</wd>

<space/>

<wd l="7862" t="10742" r="8026" b="10877">to</wd>

<space/>

<wd l="8093" t="10718" r="8357" b="10877">the</wd>

<space/>

<wd l="8429" t="10771" r="8798" b="10877">case</wd>

<space/>

<wd l="8861" t="10718" r="9331" b="10877">when</wd>

<space/>

<wd l="9394" t="10718" r="9830" b="10877">there</wd>

<space/>

<wd l="9893" t="10771" r="10224" b="10877">was</wd>

<space/>

<wd l="10291" t="10771" r="10507" b="10877">no</wd>

<space/>

</ln>

<ln l="6149" t="10987" r="10498" b="11189" baseLine="11141" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="10987" r="7032" b="11146">correction</wd>

<space/>

<wd l="7114" t="10987" r="7843" b="11189">required</wd>

<space/>

<wd l="7925" t="11011" r="8088" b="11146">to</wd>

<space/>

<wd l="8174" t="10987" r="8381" b="11146">be</wd>

<space/>

<wd l="8467" t="10987" r="9374" b="11189">performed</wd>

<space/>

<wd l="9456" t="10987" r="9715" b="11146">for</wd>

<space/>

<wd l="9797" t="11040" r="9893" b="11146">a</wd>

<space/>

<wd l="9974" t="10987" r="10498" b="11146">token.</wd>

<space/>

</ln>

<ln l="6144" t="11261" r="10502" b="11448" baseLine="11410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11266" r="6379" b="11419">As</wd>

<space/>

<wd l="6442" t="11261" r="7363" b="11419">mentioned</wd>

<space/>

<wd l="7421" t="11261" r="7973" b="11448">above,</wd>

<space/>

<wd l="8040" t="11314" r="8486" b="11419">more</wd>

<space/>

<wd l="8539" t="11261" r="8914" b="11419">than</wd>

<space/>

<wd l="8976" t="11266" r="9360" b="11419">90%</wd>

<space/>

<wd l="9427" t="11261" r="9619" b="11419">of</wd>

<space/>

<wd l="9658" t="11261" r="9926" b="11419">the</wd>

<space/>

<wd l="9979" t="11261" r="10502" b="11419">words</wd>

<space/>

</ln>

<ln l="6144" t="11530" r="10502" b="11731" baseLine="11678" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11530" r="6317" b="11683">in</wd>

<space/>

<wd l="6384" t="11530" r="6773" b="11688">both</wd>

<space/>

<wd l="6845" t="11530" r="7109" b="11688">the</wd>

<space/>

<wd l="7181" t="11530" r="7858" b="11731">training</wd>

<space/>

<wd l="7934" t="11530" r="8246" b="11688">and</wd>

<space/>

<wd l="8314" t="11554" r="8621" b="11688">test</wd>

<space/>

<wd l="8698" t="11554" r="9010" b="11688">sets</wd>

<space/>

<wd l="9086" t="11530" r="9888" b="11731">belonged</wd>

<space/>

<wd l="9955" t="11554" r="10123" b="11688">to</wd>

<space/>

<wd l="10195" t="11530" r="10502" b="11688">this</wd>

<space/>

</ln>

<ln l="6149" t="11803" r="10502" b="12005" baseLine="11952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="11827" r="6922" b="12005">category.</wd>

<space/>

<wd l="6998" t="11803" r="7478" b="11962">Table</wd>

<space/>

<wd l="7546" t="11808" r="7608" b="11957">1</wd>

<space/>

<wd l="7690" t="11803" r="8213" b="11962">shows</wd>

<space/>

<wd l="8266" t="11803" r="8530" b="11962">the</wd>

<space/>

<wd l="8582" t="11803" r="9581" b="11962">distribution</wd>

<space/>

<wd l="9629" t="11803" r="9821" b="11962">of</wd>

<space/>

<wd l="9850" t="11803" r="10118" b="11962">the</wd>

<space/>

<wd l="10171" t="11856" r="10502" b="11962">cor-</wd>

</ln>

<ln l="6144" t="12072" r="10190" b="12274" baseLine="12221" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12072" r="6754" b="12230">rection</wd>

<space/>

<wd l="6806" t="12096" r="7258" b="12274">types</wd>

<space/>

<wd l="7325" t="12125" r="7541" b="12230">on</wd>

<space/>

<wd l="7594" t="12072" r="7982" b="12230">both</wd>

<space/>

<wd l="8035" t="12072" r="8304" b="12230">the</wd>

<space/>

<wd l="8357" t="12072" r="9034" b="12274">training</wd>

<space/>

<wd l="9096" t="12072" r="9408" b="12230">and</wd>

<space/>

<wd l="9461" t="12096" r="9763" b="12230">test</wd>

<space/>

<wd l="9826" t="12096" r="10190" b="12230">sets.</wd>

</ln>

</para>

<para l="6144" t="12528" r="8525" b="12749" alignment="left" spaceBefore="209" lsp="exactly" lspExact="274" language="en">

<ln l="6144" t="12528" r="8525" b="12749" baseLine="12696" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="7">

<wd l="6144" t="12528" r="6259" b="12696">4</wd>

<space/>

<wd l="6504" t="12533" r="7454" b="12749">Proposed</wd>

<space/>

<wd l="7517" t="12528" r="8525" b="12749">Approach</wd>

</ln>

</para>

<para l="6144" t="12960" r="10502" b="13973" alignment="justified" spaceBefore="132" lsp="exactly" lspExact="271" language="en">

<ln l="6149" t="12960" r="10502" b="13162" baseLine="13109" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="12965" r="6490" b="13118">Our</wd>

<space/>

<wd l="6533" t="12960" r="7330" b="13162">approach</wd>

<space/>

<wd l="7373" t="12960" r="8050" b="13118">consists</wd>

<space/>

<wd l="8107" t="12960" r="8299" b="13118">of</wd>

<space/>

<wd l="8328" t="13013" r="8424" b="13118">a</wd>

<space/>

<wd l="8472" t="13013" r="9264" b="13162">sequence</wd>

<space/>

<wd l="9307" t="12960" r="10003" b="13162">labeling</wd>

<space/>

<wd l="10051" t="12960" r="10502" b="13118">mod-</wd>

</ln>

<ln l="6144" t="13229" r="10502" b="13430" baseLine="13378" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="13229" r="6413" b="13387">ule</wd>

<space/>

<wd l="6499" t="13229" r="6811" b="13387">and</wd>

<space/>

<wd l="6888" t="13229" r="7354" b="13387">relies</wd>

<space/>

<wd l="7450" t="13282" r="7661" b="13387">on</wd>

<space/>

<wd l="7742" t="13229" r="8424" b="13430">lookups</wd>

<space/>

<wd l="8515" t="13229" r="8942" b="13387">from</wd>

<space/>

<wd l="9024" t="13282" r="9226" b="13387">an</wd>

<space/>

<wd l="9312" t="13229" r="10186" b="13430">efficiently</wd>

<space/>

<wd l="10267" t="13229" r="10502" b="13382">in-</wd>

</ln>

<ln l="6149" t="13498" r="10502" b="13699" baseLine="13651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13498" r="6662" b="13656">dexed</wd>

<space/>

<wd l="6744" t="13550" r="7378" b="13699">n-gram</wd>

<space/>

<wd l="7464" t="13550" r="8035" b="13699">corpus</wd>

<space/>

<wd l="8131" t="13498" r="8323" b="13656">of</wd>

<space/>

<wd l="8395" t="13498" r="9062" b="13699">English</wd>

<space/>

<wd l="9144" t="13522" r="9749" b="13656">tweets.</wd>

<space/>

<wd l="9922" t="13498" r="10502" b="13656">Subse-</wd>

</ln>

<ln l="6149" t="13771" r="10454" b="13973" baseLine="13920" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13771" r="6830" b="13973">quently,</wd>

<space/>

<wd l="6893" t="13824" r="7147" b="13930">we</wd>

<space/>

<wd l="7210" t="13771" r="7930" b="13930">describe</wd>

<space/>

<wd l="7987" t="13771" r="8251" b="13930">the</wd>

<space/>

<wd l="8314" t="13771" r="8870" b="13930">details</wd>

<space/>

<wd l="8938" t="13771" r="9125" b="13930">of</wd>

<space/>

<wd l="9168" t="13771" r="9614" b="13930">these</wd>

<space/>

<wd l="9672" t="13771" r="10454" b="13930">modules.</wd>

</ln>

</para>

<para l="6144" t="14213" r="9941" b="14688" alignment="left" li="432" ri="576" spaceBefore="181" fli="-432" lsp="exactly" lspExact="269" language="en">

<ln l="6144" t="14213" r="9941" b="14414" baseLine="14366" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="14213" r="6408" b="14371">4.1</wd>

<space/>

<wd l="6643" t="14213" r="7507" b="14414">Sequence</wd>

<space/>

<wd l="7565" t="14213" r="8386" b="14414">Labeling</wd>

<space/>

<wd l="8443" t="14213" r="8717" b="14371">for</wd>

<space/>

<wd l="8770" t="14213" r="9941" b="14414">Determining</wd>

<space/>

</ln>

<ln l="6648" t="14486" r="8242" b="14688" baseLine="14635" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6648" t="14486" r="7651" b="14645">Correction</wd>

<space/>

<wd l="7709" t="14491" r="8242" b="14688">Types</wd>

</ln>

</para>

<para l="6144" t="14837" r="10512" b="15307" alignment="justified" spaceBefore="78" lsp="exactly" lspExact="267" language="en">

<ln l="6144" t="14837" r="10502" b="15038" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="14842" r="6379" b="14995">As</wd>

<space/>

<wd l="6456" t="14837" r="7090" b="15038">already</wd>

<space/>

<wd l="7157" t="14837" r="8083" b="14995">mentioned</wd>

<space/>

<wd l="8146" t="14837" r="8314" b="14990">in</wd>

<space/>

<wd l="8386" t="14837" r="9034" b="14995">Section</wd>

<space/>

<wd l="9101" t="14842" r="9250" b="15024">3,</wd>

<space/>

<wd l="9326" t="14837" r="9595" b="14995">the</wd>

<space/>

<wd l="9662" t="14837" r="9998" b="14995">first</wd>

<space/>

<wd l="10066" t="14890" r="10502" b="14995">com-</wd>

</ln>

<ln l="6144" t="15106" r="10512" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="15130" r="6739" b="15307">ponent</wd>

<space/>

<wd l="6840" t="15106" r="7008" b="15259">in</wd>

<space/>

<wd l="7114" t="15158" r="7402" b="15264">our</wd>

<space/>

<wd l="7498" t="15106" r="8198" b="15307">pipeline</wd>

<space/>

<wd l="8299" t="15158" r="8630" b="15264">was</wd>

<space/>

<wd l="8741" t="15158" r="8837" b="15264">a</wd>

<space/>

<wd l="8933" t="15106" r="9432" b="15264">linear</wd>

<space/>

<wd l="9533" t="15106" r="10003" b="15264">chain</wd>

<space/>

<wd l="10104" t="15110" r="10512" b="15264">CRF</wd>

</ln>

</para>

</column>

</section>

<dd l="1440" t="15746" r="10524" b="15975">

<para l="5771" t="15792" r="6181" b="15941" alignment="centered" lsp="exactly" lspExact="223" language="en">

<ln l="5837" t="15792" r="6115" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="25">

<wd l="5837" t="15792" r="6115" b="15941">121</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4318.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1439" marginTop="1248" marginRight="1386" marginBottom="1302" offsetX="-22" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1439" t="1248" r="10523" b="15346">

<column l="1439" t="1248" r="5821" b="15346">

<para l="1440" t="1320" r="5808" b="2606" alignment="justified" spaceBefore="13" lsp="exactly" lspExact="271" language="en">

<ln l="1450" t="1320" r="5808" b="1522" baseLine="1474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="1320" r="2218" b="1522">(Lafferty</wd>

<space/>

<wd l="2285" t="1344" r="2443" b="1478">et</wd>

<space/>

<wd l="2506" t="1320" r="2755" b="1507">al.,</wd>

<space/>

<wd l="2832" t="1325" r="3379" b="1517">2001).</wd>

<space/>

<wd l="3470" t="1320" r="4142" b="1478">Besides</wd>

<space/>

<wd l="4210" t="1320" r="4474" b="1478">the</wd>

<space/>

<wd l="4541" t="1373" r="5299" b="1478">common</wd>

<space/>

<wd l="5362" t="1320" r="5808" b="1478">word</wd>

<space/>

</ln>

<ln l="1450" t="1594" r="5794" b="1795" baseLine="1742" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="1594" r="2069" b="1752">surface</wd>

<space/>

<wd l="2150" t="1594" r="2702" b="1781">forms,</wd>

<space/>

<wd l="2808" t="1594" r="3202" b="1752">such</wd>

<space/>

<wd l="3283" t="1646" r="3451" b="1752">as</wd>

<space/>

<wd l="3542" t="1594" r="3806" b="1752">the</wd>

<space/>

<wd l="3893" t="1594" r="5064" b="1795">capitalization</wd>

<space/>

<wd l="5146" t="1618" r="5794" b="1795">pattern,</wd>

<space/>

</ln>

<ln l="1440" t="1862" r="5808" b="2050" baseLine="2016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="1862" r="1704" b="2021">the</wd>

<space/>

<wd l="1790" t="1862" r="2126" b="2021">first</wd>

<space/>

<wd l="2203" t="1862" r="2654" b="2021">letter</wd>

<space/>

<wd l="2736" t="1915" r="2918" b="2021">or</wd>

<space/>

<wd l="2995" t="1862" r="3797" b="2021">character</wd>

<space/>

<wd l="3878" t="1862" r="4584" b="2050">suffixes,</wd>

<space/>

<wd l="4680" t="1915" r="4934" b="2021">we</wd>

<space/>

<wd l="5011" t="1862" r="5510" b="2021">relied</wd>

<space/>

<wd l="5592" t="1915" r="5808" b="2021">on</wd>

<space/>

</ln>

<ln l="1440" t="2136" r="5794" b="2338" baseLine="2285" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="2136" r="1704" b="2294">the</wd>

<space/>

<wd l="1762" t="2136" r="2597" b="2338">following</wd>

<space/>

<wd l="2659" t="2136" r="3538" b="2338">dictionary</wd>

<space/>

<wd l="3590" t="2189" r="4406" b="2294">resources</wd>

<space/>

<wd l="4469" t="2189" r="4906" b="2338">upon</wd>

<space/>

<wd l="4963" t="2136" r="5794" b="2294">determin-</wd>

</ln>

<ln l="1440" t="2405" r="4978" b="2606" baseLine="2558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="2405" r="1714" b="2606">ing</wd>

<space/>

<wd l="1771" t="2405" r="2040" b="2563">the</wd>

<space/>

<wd l="2093" t="2405" r="2779" b="2563">features</wd>

<space/>

<wd l="2842" t="2405" r="3096" b="2563">for</wd>

<space/>

<wd l="3149" t="2405" r="3413" b="2563">the</wd>

<space/>

<wd l="3470" t="2405" r="4344" b="2563">individual</wd>

<space/>

<wd l="4402" t="2405" r="4978" b="2563">words:</wd>

</ln>

</para>

<para l="1675" t="2861" r="5803" b="3605" alignment="justified" li="432" spaceBefore="181" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1675" t="2861" r="5803" b="3062" baseLine="3010" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1675" t="2885" r="1877" b="3010">•</wd>

<tab position="1745"/>

<wd l="1877" t="2861" r="2141" b="3019">the</wd>

<space/>

<wd l="2270" t="2866" r="3024" b="3019">SCOWL</wd>

<space/>

<wd l="3154" t="2861" r="4032" b="3062">dictionary</wd>

<space/>

<wd l="4157" t="2861" r="4642" b="3062">being</wd>

<space/>

<wd l="4766" t="2890" r="5107" b="3062">part</wd>

<space/>

<wd l="5237" t="2861" r="5429" b="3019">of</wd>

<space/>

<wd l="5539" t="2861" r="5803" b="3019">the</wd>

<space/>

</ln>

<ln l="1891" t="3130" r="5803" b="3331" baseLine="3283">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="1891" t="3149" r="2645" b="3326">aspell</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2803" t="3130" r="3197" b="3331">spell</wd>

<space/>

<wd l="3341" t="3130" r="4018" b="3288">checker</wd>

<space/>

<wd l="4147" t="3130" r="4752" b="3331">project</wd>

<space/>

<wd l="4891" t="3130" r="5803" b="3331">containing</wd>

<space/>

</run>

</ln>

<ln l="1882" t="3403" r="5054" b="3605" baseLine="3552" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1882" t="3403" r="2707" b="3562">canonical</wd>

<space/>

<wd l="2765" t="3403" r="3437" b="3605">English</wd>

<space/>

<wd l="3494" t="3403" r="4368" b="3605">dictionary</wd>

<space/>

<wd l="4430" t="3403" r="5054" b="3590">entries,</wd>

</ln>

</para>

<para l="1675" t="3854" r="5808" b="4325" alignment="justified" li="432" spaceBefore="183" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1675" t="3854" r="5808" b="4013" baseLine="4008" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1675" t="3878" r="1877" b="4008">•</wd>

<tab position="1748"/>

<wd l="1877" t="3854" r="2141" b="4013">the</wd>

<space/>

<wd l="2290" t="3854" r="3504" b="4013">normalization</wd>

<space/>

<wd l="3653" t="3854" r="4656" b="4013">dictionaries</wd>

<space/>

<wd l="4814" t="3854" r="5006" b="4013">of</wd>

<space/>

<wd l="5141" t="3859" r="5501" b="4013">Han</wd>

<space/>

<wd l="5654" t="3878" r="5808" b="4013">et</wd>

<space/>

</ln>

<ln l="1882" t="4128" r="4608" b="4325" baseLine="4277" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1882" t="4128" r="2078" b="4286">al.</wd>

<space/>

<wd l="2150" t="4133" r="2717" b="4325">(2012)</wd>

<space/>

<wd l="2784" t="4128" r="3096" b="4286">and</wd>

<space/>

<wd l="3149" t="4128" r="3446" b="4286">Liu</wd>

<space/>

<wd l="3514" t="4152" r="3667" b="4286">et</wd>

<space/>

<wd l="3725" t="4128" r="3922" b="4286">al.</wd>

<space/>

<wd l="3994" t="4133" r="4608" b="4325">(2012),</wd>

</ln>

</para>

<para l="1675" t="4579" r="5803" b="5597" alignment="justified" li="432" spaceBefore="183" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1675" t="4579" r="5803" b="4781" baseLine="4733" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1675" t="4603" r="1877" b="4733">•</wd>

<tab position="1748"/>

<wd l="1877" t="4579" r="2141" b="4738">the</wd>

<space/>

<wd l="2251" t="4579" r="3504" b="4766">5,307-element</wd>

<space/>

<wd l="3605" t="4579" r="4819" b="4738">normalization</wd>

<space/>

<wd l="4925" t="4579" r="5803" b="4781">dictionary</wd>

<space/>

</ln>

<ln l="1882" t="4853" r="5794" b="5054" baseLine="5002">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1882" t="4853" r="2525" b="5011">derived</wd>

<space/>

<wd l="2669" t="4853" r="3096" b="5011">from</wd>

<space/>

<wd l="3240" t="4853" r="3509" b="5011">the</wd>

<space/>

<wd l="3653" t="4853" r="4162" b="5054">portal</wd>

<space/>

</run>

<wd l="4320" t="4872" r="5794" b="5050"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="modern" fontPitch="fixed" spacing="0">noslang.com</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1877" t="5122" r="5794" b="5323" baseLine="5270" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="5122" r="2410" b="5280">which</wd>

<space/>

<wd l="2491" t="5174" r="2861" b="5323">map</wd>

<space/>

<wd l="2947" t="5174" r="3710" b="5280">common</wd>

<space/>

<wd l="3797" t="5122" r="4291" b="5280">social</wd>

<space/>

<wd l="4373" t="5122" r="4910" b="5280">media</wd>

<space/>

<wd l="4992" t="5122" r="5794" b="5280">abbrevia-</wd>

</ln>

<ln l="1877" t="5395" r="4440" b="5597" baseLine="5544" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1877" t="5395" r="2294" b="5554">tions</wd>

<space/>

<wd l="2357" t="5419" r="2520" b="5554">to</wd>

<space/>

<wd l="2578" t="5395" r="2986" b="5554">their</wd>

<space/>

<wd l="3038" t="5395" r="3835" b="5597">complete</wd>

<space/>

<wd l="3888" t="5395" r="4440" b="5554">forms.</wd>

</ln>

</para>

<para l="1440" t="5846" r="5808" b="8486" alignment="justified" spaceBefore="198" fli="216" lsp="exactly" lspExact="270" language="en">

<ln l="1656" t="5846" r="5794" b="6048" baseLine="5995" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1656" t="5851" r="1963" b="6005">For</wd>

<space/>

<wd l="2021" t="5846" r="2419" b="6005">each</wd>

<space/>

<wd l="2477" t="5846" r="3005" b="6034">token,</wd>

<space/>

<wd l="3077" t="5846" r="3523" b="6005">word</wd>

<space/>

<wd l="3581" t="5870" r="3955" b="6048">type</wd>

<space/>

<wd l="4018" t="5846" r="4699" b="6005">features</wd>

<space/>

<wd l="4766" t="5899" r="5194" b="6005">were</wd>

<space/>

<wd l="5256" t="5899" r="5794" b="6048">gener-</wd>

</ln>

<ln l="1445" t="6115" r="5803" b="6317" baseLine="6269" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="6115" r="1805" b="6274">ated</wd>

<space/>

<wd l="1872" t="6115" r="2347" b="6317">along</wd>

<space/>

<wd l="2414" t="6115" r="2808" b="6274">with</wd>

<space/>

<wd l="2870" t="6115" r="3134" b="6274">the</wd>

<space/>

<wd l="3202" t="6115" r="3648" b="6274">word</wd>

<space/>

<wd l="3710" t="6139" r="4162" b="6317">types</wd>

<space/>

<wd l="4238" t="6115" r="4430" b="6274">of</wd>

<space/>

<wd l="4478" t="6115" r="4680" b="6274">its</wd>

<space/>

<wd l="4752" t="6115" r="5803" b="6317">neighboring</wd>

<space/>

</ln>

<ln l="1440" t="6389" r="5808" b="6590" baseLine="6538" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="6389" r="2050" b="6547">tokens.</wd>

<space/>

<wd l="2155" t="6389" r="2491" b="6547">The</wd>

<space/>

<wd l="2558" t="6394" r="2947" b="6547">POS</wd>

<space/>

<wd l="3019" t="6413" r="3365" b="6590">tags</wd>

<space/>

<wd l="3442" t="6389" r="4186" b="6590">assigned</wd>

<space/>

<wd l="4248" t="6413" r="4416" b="6547">to</wd>

<space/>

<wd l="4488" t="6389" r="4882" b="6547">each</wd>

<space/>

<wd l="4944" t="6389" r="5429" b="6547">token</wd>

<space/>

<wd l="5496" t="6389" r="5808" b="6547">and</wd>

<space/>

</ln>

<ln l="1440" t="6658" r="5808" b="6859" baseLine="6811" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="6658" r="1637" b="6816">its</wd>

<space/>

<wd l="1714" t="6658" r="2765" b="6859">neighboring</wd>

<space/>

<wd l="2837" t="6658" r="3398" b="6816">tokens</wd>

<space/>

<wd l="3475" t="6658" r="3691" b="6859">by</wd>

<space/>

<wd l="3763" t="6658" r="4027" b="6816">the</wd>

<space/>

<wd l="4099" t="6658" r="4728" b="6816">Twitter</wd>

<space/>

<wd l="4790" t="6662" r="5179" b="6816">POS</wd>

<space/>

<wd l="5261" t="6682" r="5808" b="6859">tagger</wd>

<space/>

</ln>

<ln l="1450" t="6931" r="5798" b="7133" baseLine="7080" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="6931" r="2165" b="7133">(Gimpel</wd>

<space/>

<wd l="2222" t="6955" r="2376" b="7090">et</wd>

<space/>

<wd l="2429" t="6931" r="2678" b="7118">al.,</wd>

<space/>

<wd l="2746" t="6936" r="3240" b="7128">2011)</wd>

<space/>

<wd l="3302" t="6984" r="3725" b="7090">were</wd>

<space/>

<wd l="3782" t="6931" r="4123" b="7090">also</wd>

<space/>

<wd l="4176" t="6931" r="4834" b="7090">utilized</wd>

<space/>

<wd l="4886" t="6984" r="5054" b="7090">as</wd>

<space/>

<wd l="5117" t="6931" r="5798" b="7090">features</wd>

<space/>

</ln>

<ln l="1440" t="7200" r="5798" b="7402" baseLine="7354" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="7200" r="1613" b="7354">in</wd>

<space/>

<wd l="1685" t="7200" r="1949" b="7358">the</wd>

<space/>

<wd l="2026" t="7205" r="2434" b="7358">CRF</wd>

<space/>

<wd l="2506" t="7200" r="3096" b="7358">model.</wd>

<space/>

<wd l="3230" t="7200" r="3571" b="7358">The</wd>

<space/>

<wd l="3643" t="7200" r="4272" b="7358">Twitter</wd>

<space/>

<wd l="4339" t="7205" r="4728" b="7358">POS</wd>

<space/>

<wd l="4814" t="7224" r="5078" b="7402">tag</wd>

<space/>

<wd l="5165" t="7224" r="5395" b="7358">set</wd>

<space/>

<wd l="5467" t="7253" r="5798" b="7358">was</wd>

<space/>

</ln>

<ln l="1440" t="7474" r="5808" b="7675" baseLine="7622">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="7474" r="1968" b="7632">useful</wd>

<space/>

<wd l="2045" t="7498" r="2213" b="7632">to</wd>

<space/>

<wd l="2290" t="7526" r="2525" b="7661">us,</wd>

<space/>

<wd l="2621" t="7526" r="2789" b="7632">as</wd>

<space/>

<wd l="2870" t="7474" r="2995" b="7632">it</wd>

<space/>

<wd l="3072" t="7474" r="3782" b="7632">contains</wd>

<space/>

<wd l="3869" t="7526" r="3965" b="7632">a</wd>

<space/>

<wd l="4046" t="7498" r="4747" b="7675">separate</wd>

<space/>

<wd l="4824" t="7498" r="5088" b="7675">tag</wd>

<space/>

</run>

<wd l="5174" t="7474" r="5467" b="7670"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">G</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="5549" t="7474" r="5808" b="7632">for</wd>

<space/>

</run>

</ln>

<ln l="1440" t="7742" r="5794" b="7944" baseLine="7896">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="7742" r="2419" b="7901">multi-word</wd>

<space/>

<wd l="2491" t="7742" r="3648" b="7901">abbreviations</wd>

<space/>

<wd l="3734" t="7747" r="4104" b="7944">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4190" t="7742" r="4402" b="7944">ily</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4469" t="7742" r="4728" b="7901">for</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4786" t="7752" r="4877" b="7896">I</wd>

<space/>

<wd l="4939" t="7742" r="5290" b="7901">love</wd>

<space/>

</run>

<wd l="5352" t="7747" r="5794" b="7944"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">you</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="1440" t="8016" r="5803" b="8218" baseLine="8165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="8016" r="1973" b="8174">which</wd>

<space/>

<wd l="2026" t="8069" r="2357" b="8174">was</wd>

<space/>

<wd l="2419" t="8016" r="3187" b="8218">expected</wd>

<space/>

<wd l="3240" t="8040" r="3403" b="8174">to</wd>

<space/>

<wd l="3461" t="8016" r="3667" b="8174">be</wd>

<space/>

<wd l="3720" t="8016" r="4272" b="8218">highly</wd>

<space/>

<wd l="4330" t="8016" r="5179" b="8174">indicative</wd>

<space/>

<wd l="5232" t="8016" r="5491" b="8174">for</wd>

<space/>

<wd l="5539" t="8016" r="5803" b="8174">the</wd>

<space/>

</ln>

<ln l="1445" t="8285" r="3768" b="8486" baseLine="8438">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="8285" r="2328" b="8443">correction</wd>

<space/>

<wd l="2381" t="8309" r="2755" b="8486">type</wd>

<space/>

</run>

<wd l="2827" t="8285" r="3768" b="8472"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">toM</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">ABB</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1440" t="8558" r="5808" b="9528" alignment="justified" fli="216" lsp="exactly" lspExact="268" language="en">

<ln l="1656" t="8558" r="5803" b="8717" baseLine="8707" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="8563" r="1843" b="8712">In</wd>

<space/>

<wd l="2026" t="8558" r="2482" b="8717">order</wd>

<space/>

<wd l="2659" t="8582" r="2822" b="8717">to</wd>

<space/>

<wd l="3005" t="8558" r="3211" b="8717">be</wd>

<space/>

<wd l="3394" t="8558" r="3754" b="8717">able</wd>

<space/>

<wd l="3931" t="8582" r="4099" b="8717">to</wd>

<space/>

<wd l="4286" t="8558" r="5362" b="8717">discriminate</wd>

<space/>

<wd l="5539" t="8558" r="5803" b="8717">the</wd>

<space/>

</ln>

<ln l="1445" t="8822" r="5803" b="9029" baseLine="8981">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1445" t="8822" r="2654" b="9029">MissingW5</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2813" t="8827" r="3274" b="9014">class,</wd>

<space/>

<wd l="3461" t="8880" r="3715" b="8986">we</wd>

<space/>

<wd l="3869" t="8827" r="4800" b="8986">introduced</wd>

<space/>

<wd l="4958" t="8880" r="5054" b="8986">a</wd>

<space/>

<wd l="5198" t="8827" r="5803" b="8986">feature</wd>

<space/>

</run>

</ln>

<ln l="1440" t="9101" r="5808" b="9302" baseLine="9250">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1440" t="9101" r="1973" b="9259">which</wd>

<space/>

<wd l="2054" t="9101" r="2822" b="9259">indicates</wd>

<space/>

<wd l="2914" t="9101" r="3173" b="9259">for</wd>

<space/>

<wd l="3259" t="9154" r="3355" b="9259">a</wd>

<space/>

<wd l="3432" t="9101" r="3917" b="9259">token</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4003" t="9115" r="4070" b="9259">t</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4166" t="9101" r="5117" b="9302">originating</wd>

<space/>

<wd l="5203" t="9101" r="5630" b="9259">from</wd>

<space/>

<wd l="5712" t="9154" r="5808" b="9259">a</wd>

<space/>

</run>

</ln>

<ln l="1440" t="9370" r="3715" b="9528" baseLine="9518" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="9394" r="1915" b="9528">tweet</wd>

<space/>

<wd l="1968" t="9370" r="2674" b="9528">whether</wd>

<space/>

<wd l="2726" t="9370" r="2990" b="9528">the</wd>

<space/>

<wd l="3048" t="9370" r="3715" b="9528">relation</wd>

</ln>

</para>

<para l="2582" t="9850" r="4661" b="10214" alignment="left" li="1152" ri="1152" spaceBefore="340" fli="144" lsp="exactly" lspExact="149" language="en">

<ln l="2741" t="9850" r="4661" b="10075" baseLine="10016">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2741" t="9917" r="3139" b="10022">max</wd>

<space/>

</run>

<wd l="3346" t="9850" r="4238" b="10075"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">freq</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">T</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="4330" t="9878" r="4469" b="10051">≥</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4550" t="9922" r="4661" b="10022">τ</wd>

<space/>

</run>

</ln>

<ln l="2582" t="10051" r="3283" b="10214" baseLine="10166">

<wd l="2582" t="10051" r="3283" b="10214"><run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">E</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">split</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0">)</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1440" t="10464" r="5813" b="14222" alignment="justified" spaceBefore="224" lsp="exactly" lspExact="271" language="en">

<ln l="1440" t="10464" r="5794" b="10685" baseLine="10647">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="10498" r="1958" b="10685">holds,</wd>

<space/>

<wd l="2074" t="10498" r="2606" b="10656">where</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2712" t="10555" r="2822" b="10656">τ</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2923" t="10498" r="3058" b="10656">is</wd>

<space/>

<wd l="3168" t="10550" r="3264" b="10656">a</wd>

<space/>

<wd l="3360" t="10498" r="4171" b="10656">threshold</wd>

<space/>

<wd l="4272" t="10498" r="5131" b="10656">calibrated</wd>

<space/>

<wd l="5227" t="10522" r="5390" b="10656">to</wd>

<space/>

</run>

<wd l="5510" t="10464" r="5794" b="10656"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">10</run>

<run italic="true" underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">6</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="1440" t="10757" r="5808" b="10982" baseLine="10921">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="10771" r="1939" b="10930">based</wd>

<space/>

<wd l="2006" t="10824" r="2222" b="10930">on</wd>

<space/>

<wd l="2285" t="10771" r="2549" b="10930">the</wd>

<space/>

<wd l="2616" t="10771" r="3288" b="10973">training</wd>

<space/>

<wd l="3365" t="10795" r="3643" b="10958">set,</wd>

<space/>

</run>

<wd l="3730" t="10757" r="4627" b="10982"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">freq</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">T</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4709" t="10771" r="4843" b="10930">is</wd>

<space/>

<wd l="4920" t="10824" r="5016" b="10930">a</wd>

<space/>

<wd l="5078" t="10771" r="5808" b="10930">function</wd>

<space/>

</run>

</ln>

<ln l="1440" t="11040" r="5808" b="11242" baseLine="11194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="11040" r="1973" b="11198">which</wd>

<space/>

<wd l="2030" t="11064" r="2626" b="11198">returns</wd>

<space/>

<wd l="2693" t="11040" r="2957" b="11198">the</wd>

<space/>

<wd l="3014" t="11040" r="3878" b="11242">frequency</wd>

<space/>

<wd l="3941" t="11040" r="4406" b="11198">value</wd>

<space/>

<wd l="4469" t="11040" r="5362" b="11198">associated</wd>

<space/>

<wd l="5419" t="11040" r="5808" b="11198">with</wd>

<space/>

</ln>

<ln l="1445" t="11314" r="5794" b="11515" baseLine="11462">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="11366" r="1541" b="11472">a</wd>

<space/>

<wd l="1603" t="11314" r="2088" b="11515">string</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2155" t="11366" r="2242" b="11472">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2314" t="11314" r="3163" b="11515">according</wd>

<space/>

<wd l="3226" t="11338" r="3389" b="11472">to</wd>

<space/>

<wd l="3451" t="11314" r="3715" b="11472">the</wd>

<space/>

<wd l="3778" t="11314" r="4416" b="11515">Google</wd>

<space/>

<wd l="4498" t="11318" r="4709" b="11467">1T</wd>

<space/>

<wd l="4776" t="11314" r="5405" b="11515">5-gram</wd>

<space/>

<wd l="5462" t="11366" r="5794" b="11472">cor-</wd>

</run>

</ln>

<ln l="1440" t="11568" r="5803" b="11794" baseLine="11731">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="11635" r="1733" b="11784">pus</wd>

<space/>

<wd l="1810" t="11582" r="2122" b="11741">and</wd>

<space/>

<wd l="2179" t="11582" r="2448" b="11741">the</wd>

<space/>

<wd l="2510" t="11582" r="3240" b="11741">function</wd>

<space/>

</run>

<wd l="3307" t="11568" r="3965" b="11794"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">split</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4046" t="11606" r="4642" b="11741">returns</wd>

<space/>

<wd l="4714" t="11582" r="4978" b="11741">the</wd>

<space/>

<wd l="5050" t="11606" r="5285" b="11741">set</wd>

<space/>

<wd l="5347" t="11582" r="5539" b="11741">of</wd>

<space/>

<wd l="5592" t="11582" r="5803" b="11741">all</wd>

<space/>

</run>

</ln>

<ln l="1440" t="11856" r="5794" b="12058" baseLine="12005">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="11856" r="1704" b="12014">the</wd>

<space/>

<wd l="1795" t="11856" r="2510" b="12058">possible</wd>

<space/>

<wd l="2611" t="11856" r="3053" b="12058">splits</wd>

<space/>

<wd l="3154" t="11856" r="3346" b="12014">of</wd>

<space/>

<wd l="3418" t="11856" r="3902" b="12014">token</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3994" t="11870" r="4061" b="12014">t</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4166" t="11856" r="4560" b="12014">such</wd>

<space/>

<wd l="4646" t="11856" r="4978" b="12014">that</wd>

<space/>

<wd l="5064" t="11856" r="5261" b="12014">its</wd>

<space/>

<wd l="5362" t="11909" r="5794" b="12014">com-</wd>

</run>

</ln>

<ln l="1440" t="12125" r="5794" b="12326" baseLine="12274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="12149" r="2112" b="12326">ponents</wd>

<space/>

<wd l="2208" t="12178" r="2467" b="12283">are</wd>

<space/>

<wd l="2558" t="12125" r="2770" b="12283">all</wd>

<space/>

<wd l="2861" t="12125" r="3706" b="12283">contained</wd>

<space/>

<wd l="3792" t="12125" r="3960" b="12278">in</wd>

<space/>

<wd l="4046" t="12125" r="4310" b="12283">the</wd>

<space/>

<wd l="4402" t="12130" r="5150" b="12283">SCOWL</wd>

<space/>

<wd l="5242" t="12125" r="5794" b="12283">dictio-</wd>

</ln>

<ln l="1440" t="12379" r="5798" b="12605" baseLine="12545">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="12446" r="1858" b="12595">nary.</wd>

<space/>

<wd l="1954" t="12398" r="2256" b="12552">For</wd>

<space/>

<wd l="2309" t="12394" r="3024" b="12552">instance</wd>

<space/>

</run>

<wd l="3096" t="12379" r="5122" b="12605"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">split</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">whataburger</run>

<run italic="true" underlined="none" subsuperscript="superscript" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">&quot;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="5198" t="12418" r="5798" b="12552">returns</wd>

<space/>

</run>

</ln>

<ln l="1445" t="12667" r="5813" b="12869" baseLine="12816">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="12720" r="1541" b="12826">a</wd>

<space/>

<wd l="1632" t="12691" r="1862" b="12826">set</wd>

<space/>

<wd l="1949" t="12667" r="2141" b="12826">of</wd>

<space/>

<wd l="2222" t="12667" r="2664" b="12869">splits</wd>

<space/>

<wd l="2755" t="12667" r="3576" b="12869">including</wd>

<space/>

</run>

<wd l="3667" t="12667" r="4190" b="12826"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">what</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4267" t="12720" r="4373" b="12826">a</wd>

<space/>

</run>

<wd l="4464" t="12667" r="5184" b="12869"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">burger</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="5290" t="12667" r="5813" b="12826"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">what</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="1440" t="12936" r="5808" b="13138" baseLine="13090">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="12989" r="1546" b="13094">a</wd>

<space/>

<wd l="1642" t="12936" r="2035" b="13138">burg</wd>

<space/>

</run>

<wd l="2131" t="12941" r="2405" b="13094"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">er</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2501" t="12936" r="2813" b="13094">and</wd>

<space/>

</run>

<wd l="2909" t="12936" r="3427" b="13094"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">what</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3514" t="12936" r="3725" b="13094">ab</wd>

<space/>

</run>

<wd l="3821" t="12941" r="4440" b="13138"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">urger</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4618" t="12941" r="4853" b="13094">As</wd>

<space/>

<wd l="4949" t="12936" r="5386" b="13094">there</wd>

<space/>

<wd l="5472" t="12936" r="5611" b="13094">is</wd>

<space/>

<wd l="5712" t="12989" r="5808" b="13094">a</wd>

<space/>

</run>

</ln>

<ln l="1450" t="13210" r="5794" b="13411" baseLine="13358">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1450" t="13210" r="1819" b="13411">split</wd>

<space/>

<wd l="1886" t="13210" r="2208" b="13406">(i.e.</wd>

<space/>

</run>

<wd l="2285" t="13210" r="2808" b="13368"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">what</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2866" t="13262" r="2971" b="13368">a</wd>

<space/>

</run>

<wd l="3043" t="13210" r="3782" b="13411"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">burger</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3854" t="13210" r="4181" b="13368">that</wd>

<space/>

<wd l="4243" t="13210" r="4382" b="13368">is</wd>

<space/>

<wd l="4459" t="13210" r="5424" b="13411">sufficiently</wd>

<space/>

<wd l="5491" t="13210" r="5794" b="13368">fre-</wd>

</run>

</ln>

<ln l="1445" t="13478" r="5798" b="13680" baseLine="13632" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="13502" r="1925" b="13680">quent</wd>

<space/>

<wd l="1973" t="13478" r="2827" b="13680">according</wd>

<space/>

<wd l="2875" t="13502" r="3038" b="13637">to</wd>

<space/>

<wd l="3086" t="13478" r="3355" b="13637">the</wd>

<space/>

<wd l="3398" t="13531" r="4032" b="13680">n-gram</wd>

<space/>

<wd l="4075" t="13531" r="4699" b="13680">corpus,</wd>

<space/>

<wd l="4757" t="13531" r="5006" b="13637">we</wd>

<space/>

<wd l="5054" t="13478" r="5414" b="13637">take</wd>

<space/>

<wd l="5458" t="13478" r="5582" b="13637">it</wd>

<space/>

<wd l="5630" t="13531" r="5798" b="13637">as</wd>

<space/>

</ln>

<ln l="1445" t="13752" r="5803" b="13954" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="13805" r="1642" b="13910">an</wd>

<space/>

<wd l="1704" t="13752" r="2578" b="13910">indication</wd>

<space/>

<wd l="2635" t="13752" r="2962" b="13910">that</wd>

<space/>

<wd l="3019" t="13752" r="3283" b="13910">the</wd>

<space/>

<wd l="3350" t="13752" r="4018" b="13954">original</wd>

<space/>

<wd l="4080" t="13752" r="4565" b="13910">token</wd>

<space/>

<wd l="4627" t="13752" r="5290" b="13910">omitted</wd>

<space/>

<wd l="5357" t="13805" r="5803" b="13910">some</wd>

<space/>

</ln>

<ln l="1440" t="14021" r="5520" b="14222" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="14021" r="2410" b="14222">whitespace</wd>

<space/>

<wd l="2467" t="14021" r="3341" b="14179">characters</wd>

<space/>

<wd l="3403" t="14021" r="3730" b="14179">that</wd>

<space/>

<wd l="3782" t="14074" r="4037" b="14179">we</wd>

<space/>

<wd l="4094" t="14021" r="4507" b="14179">need</wd>

<space/>

<wd l="4560" t="14045" r="4728" b="14179">to</wd>

<space/>

<wd l="4786" t="14021" r="5520" b="14179">inserted.</wd>

</ln>

</para>

<para l="1440" t="14294" r="5803" b="15307" alignment="justified" spaceBefore="1" spaceAfter="19" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="1656" t="14294" r="5798" b="14453" baseLine="14443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1656" t="14299" r="1814" b="14448">A</wd>

<space/>

<wd l="1906" t="14299" r="2314" b="14453">CRF</wd>

<space/>

<wd l="2395" t="14294" r="2938" b="14453">model</wd>

<space/>

<wd l="3029" t="14294" r="3418" b="14453">with</wd>

<space/>

<wd l="3499" t="14294" r="3768" b="14453">the</wd>

<space/>

<wd l="3854" t="14294" r="4363" b="14453">above</wd>

<space/>

<wd l="4450" t="14294" r="5054" b="14453">feature</wd>

<space/>

<wd l="5150" t="14318" r="5386" b="14453">set</wd>

<space/>

<wd l="5467" t="14347" r="5798" b="14453">was</wd>

<space/>

</ln>

<ln l="1440" t="14563" r="5794" b="14765" baseLine="14717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1440" t="14563" r="2050" b="14722">trained</wd>

<space/>

<wd l="2088" t="14563" r="2558" b="14765">using</wd>

<space/>

<wd l="2602" t="14568" r="3346" b="14722">L-BFGS</wd>

<space/>

<wd l="3398" t="14563" r="4070" b="14765">training</wd>

<space/>

<wd l="4118" t="14563" r="4776" b="14722">method</wd>

<space/>

<wd l="4819" t="14563" r="5131" b="14722">and</wd>

<space/>

<wd l="5174" t="14568" r="5390" b="14717">L1</wd>

<space/>

<wd l="5458" t="14616" r="5794" b="14765">reg-</wd>

</ln>

<ln l="1440" t="14837" r="5803" b="15038" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1440" t="14837" r="2376" b="14995">ularization</wd>

<space/>

<wd l="2438" t="14837" r="2909" b="15038">using</wd>

<space/>

<wd l="2986" t="14837" r="3806" b="14995">CRFsuite</wd>

<space/>

<wd l="3883" t="14837" r="4714" b="15034">(Okazaki,</wd>

<space/>

<wd l="4800" t="14842" r="5347" b="15034">2007).</wd>

<space/>

<wd l="5467" t="14837" r="5803" b="14995">The</wd>

<space/>

</ln>

<ln l="1445" t="15106" r="5798" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1445" t="15106" r="2035" b="15264">overall</wd>

<space/>

<wd l="2131" t="15106" r="2616" b="15264">token</wd>

<space/>

<wd l="2707" t="15158" r="3475" b="15307">accuracy</wd>

<space/>

<wd l="3566" t="15106" r="3874" b="15264">this</wd>

<space/>

<wd l="3974" t="15106" r="4517" b="15264">model</wd>

<space/>

<wd l="4618" t="15106" r="5381" b="15264">achieved</wd>

<space/>

<wd l="5467" t="15158" r="5798" b="15264">was</wd>

</ln>

</para>

</column>

<column l="6141" t="1248" r="10523" b="15346">

<table l="6141" t="1248" r="10523" b="3449" alignment="left" spaceAfter="144">

<gridTable>

<gridCol>1572</gridCol>

<gridCol>1051</gridCol>

<gridCol>836</gridCol>

<gridCol>923</gridCol>

<gridRow>283</gridRow>

<gridRow>250</gridRow>

<gridRow>274</gridRow>

<gridRow>288</gridRow>

<gridRow>268</gridRow>

<gridRow>269</gridRow>

<gridRow>283</gridRow>

<gridRow>286</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6141" t="1248" r="7713" b="1531" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7834" t="1291" r="8645" b="1450" alignment="centered" spaceAfter="23" lsp="exactly" lspExact="255" language="en">

<ln l="7834" t="1291" r="8645" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7834" t="1291" r="8645" b="1450">Precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="8904" t="1291" r="9461" b="1450" alignment="centered" spaceAfter="23" lsp="exactly" lspExact="255" language="en">

<ln l="8904" t="1291" r="9461" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8904" t="1291" r="9461" b="1450">Recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="9725" t="1296" r="10378" b="1450" alignment="centered" spaceAfter="23" lsp="exactly" lspExact="255" language="en">

<ln l="9725" t="1296" r="10378" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9725" t="1296" r="10378" b="1450">F-score</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<para l="6283" t="1565" r="7589" b="1771" alignment="left" li="137" lsp="exactly" lspExact="240" language="en">

<ln l="6283" t="1565" r="7589" b="1771" baseLine="1723" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="6283" t="1565" r="7589" b="1771">MissingApos</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<para l="7944" t="1570" r="8534" b="1728" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="7944"/>

<ln l="7944" t="1570" r="8534" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="1570" r="8534" b="1728">0.9686</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="8890" t="1574" r="9480" b="1728" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="8890"/>

<ln l="8890" t="1574" r="9480" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="1574" r="9480" b="1728">0.9744</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="9754" t="1570" r="10339" b="1728" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="9754"/>

<ln l="9754" t="1570" r="10339" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9754" t="1570" r="10339" b="1728">0.9715</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6283" t="1838" r="7493" b="2045" alignment="left" li="137" lsp="exactly" lspExact="256" language="en">

<ln l="6283" t="1838" r="7493" b="2045" baseLine="1992" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="6283" t="1838" r="7493" b="2045">MissingW5</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="1843" r="8525" b="2002" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="7944"/>

<ln l="7944" t="1843" r="8525" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="7944" t="1843" r="8525" b="2002">0.8795</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="1843" r="9480" b="2002" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="8890"/>

<ln l="8890" t="1843" r="9480" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="1843" r="9480" b="2002">0.5794</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="1843" r="10349" b="2002" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="9754"/>

<ln l="9754" t="1843" r="10349" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9754" t="1843" r="10349" b="2002">0.6986</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6298" t="2117" r="7166" b="2328" alignment="left" li="137" lsp="exactly" lspExact="254" language="en">

<ln l="6298" t="2117" r="7166" b="2328" baseLine="2284">

<wd l="6298" t="2117" r="7166" b="2328"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">&lt;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">2</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="2117" r="8530" b="2270" alignment="left" spaceAfter="8" lsp="exactly" lspExact="269" language="en">

<tabs position="7944"/>

<ln l="7944" t="2117" r="8530" b="2270" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="2117" r="8530" b="2270">0.9078</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="2112" r="9480" b="2270" alignment="left" spaceAfter="8" lsp="exactly" lspExact="269" language="en">

<tabs position="8890"/>

<ln l="8890" t="2112" r="9480" b="2270" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="2112" r="9480" b="2270">0.8504</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="2117" r="10349" b="2270" alignment="left" spaceAfter="8" lsp="exactly" lspExact="269" language="en">

<tabs position="9754"/>

<ln l="9754" t="2117" r="10349" b="2270" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9754" t="2117" r="10349" b="2270">0.8782</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6298" t="2390" r="7171" b="2602" alignment="left" li="137" lsp="exactly" lspExact="259" language="en">

<ln l="6298" t="2390" r="7171" b="2602" baseLine="2552">

<wd l="6298" t="2390" r="7171" b="2602"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">&gt;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">3</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="2386" r="8525" b="2544" alignment="left" spaceAfter="14" lsp="exactly" lspExact="249" language="en">

<tabs position="7944"/>

<ln l="7944" t="2386" r="8525" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="7944" t="2386" r="8525" b="2544">0.9593</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="2386" r="9480" b="2544" alignment="left" spaceAfter="14" lsp="exactly" lspExact="249" language="en">

<tabs position="8890"/>

<ln l="8890" t="2386" r="9480" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="2386" r="9480" b="2544">0.6852</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="2390" r="10349" b="2544" alignment="left" spaceAfter="14" lsp="exactly" lspExact="249" language="en">

<tabs position="9754"/>

<ln l="9754" t="2390" r="10349" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9754" t="2390" r="10349" b="2544">0.7994</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6298" t="2654" r="7176" b="2842" alignment="left" li="137" lsp="exactly" lspExact="262" language="en">

<ln l="6298" t="2654" r="7176" b="2842" baseLine="2825">

<wd l="6298" t="2654" r="7176" b="2842"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">toM</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">ABB</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="2654" r="8534" b="2813" alignment="left" spaceAfter="14" lsp="exactly" lspExact="255" language="en">

<tabs position="7944"/>

<ln l="7944" t="2654" r="8534" b="2813" baseLine="2808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="2654" r="8534" b="2813">0.9624</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="2659" r="9480" b="2813" alignment="left" spaceAfter="14" lsp="exactly" lspExact="255" language="en">

<tabs position="8890"/>

<ln l="8890" t="2659" r="9480" b="2813" baseLine="2808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="2659" r="9480" b="2813">0.8942</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="2659" r="10330" b="2813" alignment="left" spaceAfter="14" lsp="exactly" lspExact="255" language="en">

<tabs position="9754"/>

<ln l="9754" t="2659" r="10330" b="2813" baseLine="2808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="9754" t="2659" r="10330" b="2813">0.9271</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6288" t="2923" r="6442" b="3086" alignment="left" li="137" spaceAfter="17" lsp="exactly" lspExact="256" language="en">

<ln l="6288" t="2923" r="6442" b="3086" baseLine="3077" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6288" t="2923" r="6442" b="3086">O</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7944" t="2933" r="8534" b="3086" alignment="left" spaceAfter="18" lsp="exactly" lspExact="255" language="en">

<tabs position="7944"/>

<ln l="7944" t="2933" r="8534" b="3086" baseLine="3077" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="2933" r="8534" b="3086">0.9874</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="8890" t="2928" r="9475" b="3086" alignment="left" spaceAfter="18" lsp="exactly" lspExact="255" language="en">

<tabs position="8890"/>

<ln l="8890" t="2928" r="9475" b="3086" baseLine="3077" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8890" t="2928" r="9475" b="3086">0.9959</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="9754" t="2928" r="10349" b="3086" alignment="left" spaceAfter="18" lsp="exactly" lspExact="255" language="en">

<tabs position="9754"/>

<ln l="9754" t="2928" r="10349" b="3086" baseLine="3077" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9754" t="2928" r="10349" b="3086">0.9916</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<para l="6278" t="3254" r="7550" b="3408" alignment="left" li="137" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<ln l="6278" t="3254" r="7550" b="3408" baseLine="3355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6278" t="3259" r="6821" b="3365">macro</wd>

<space/>

<wd l="6883" t="3259" r="7550" b="3408">average</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<para l="7944" t="3211" r="8534" b="3365" alignment="left" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<tabs position="7944"/>

<ln l="7944" t="3211" r="8534" b="3365" baseLine="3355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="3211" r="8534" b="3365">0.9442</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="8890" t="3211" r="9475" b="3365" alignment="left" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<tabs position="8890"/>

<ln l="8890" t="3211" r="9475" b="3365" baseLine="3355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8890" t="3211" r="9475" b="3365">0.8299</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="9754" t="3211" r="10344" b="3365" alignment="left" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<tabs position="9754"/>

<ln l="9754" t="3211" r="10344" b="3365" baseLine="3355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9754" t="3211" r="10344" b="3365">0.8777</wd>

</ln>

</para>

</cell>

</table>

<para l="6144" t="3653" r="10502" b="4128" alignment="justified" spaceAfter="244" lsp="exactly" lspExact="270" language="en">

<ln l="6144" t="3653" r="10502" b="3854" baseLine="3806" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="3653" r="6624" b="3811">Table</wd>

<space/>

<wd l="6686" t="3658" r="6830" b="3811">2:</wd>

<space/>

<wd l="6917" t="3653" r="7550" b="3811">Results</wd>

<space/>

<wd l="7618" t="3653" r="7810" b="3811">of</wd>

<space/>

<wd l="7853" t="3653" r="8731" b="3854">predicting</wd>

<space/>

<wd l="8789" t="3653" r="9058" b="3811">the</wd>

<space/>

<wd l="9115" t="3653" r="9998" b="3811">correction</wd>

<space/>

<wd l="10051" t="3677" r="10502" b="3854">types</wd>

<space/>

</ln>

<ln l="6144" t="3926" r="8650" b="4128" baseLine="4075" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="3926" r="6403" b="4085">for</wd>

<space/>

<wd l="6456" t="3926" r="7013" b="4085">tokens</wd>

<space/>

<wd l="7080" t="3979" r="7296" b="4085">on</wd>

<space/>

<wd l="7349" t="3926" r="7613" b="4085">the</wd>

<space/>

<wd l="7670" t="3926" r="8347" b="4128">training</wd>

<space/>

<wd l="8414" t="3950" r="8650" b="4085">set</wd>

</ln>

</para>

<table l="6141" t="4387" r="10523" b="6589" alignment="left" spaceAfter="144">

<gridTable>

<gridCol>1572</gridCol>

<gridCol>1051</gridCol>

<gridCol>836</gridCol>

<gridCol>923</gridCol>

<gridRow>283</gridRow>

<gridRow>250</gridRow>

<gridRow>274</gridRow>

<gridRow>288</gridRow>

<gridRow>273</gridRow>

<gridRow>259</gridRow>

<gridRow>288</gridRow>

<gridRow>287</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6141" t="4387" r="7713" b="4670" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7834" t="4430" r="8645" b="4589" alignment="centered" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<ln l="7834" t="4430" r="8645" b="4589" baseLine="4579" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7834" t="4430" r="8645" b="4589">Precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="8904" t="4430" r="9461" b="4589" alignment="centered" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<ln l="8904" t="4430" r="9461" b="4589" baseLine="4579" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8904" t="4430" r="9461" b="4589">Recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="9725" t="4435" r="10378" b="4589" alignment="centered" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<ln l="9725" t="4435" r="10378" b="4589" baseLine="4579" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9725" t="4435" r="10378" b="4589">F-score</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<para l="6283" t="4704" r="7589" b="4910" alignment="left" li="137" lsp="exactly" lspExact="240" language="en">

<ln l="6283" t="4704" r="7589" b="4910" baseLine="4862" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="6283" t="4704" r="7589" b="4910">MissingApos</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<para l="7944" t="4709" r="8525" b="4867" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="7944"/>

<ln l="7944" t="4709" r="8525" b="4867" baseLine="4862" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="7944" t="4709" r="8525" b="4867">0.9755</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="8890" t="4714" r="9480" b="4867" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="8890"/>

<ln l="8890" t="4714" r="9480" b="4867" baseLine="4862" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="4714" r="9480" b="4867">0.9702</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="9754" t="4714" r="10344" b="4867" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="9754"/>

<ln l="9754" t="4714" r="10344" b="4867" baseLine="4862" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9754" t="4714" r="10344" b="4867">0.9728</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6283" t="4978" r="7493" b="5184" alignment="left" li="137" lsp="exactly" lspExact="257" language="en">

<ln l="6283" t="4978" r="7493" b="5184" baseLine="5131" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="6283" t="4978" r="7493" b="5184">MissingW5</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="4982" r="8534" b="5141" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="7944"/>

<ln l="7944" t="4982" r="8534" b="5141" baseLine="5131" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="4982" r="8534" b="5141">0.7674</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="4987" r="9480" b="5141" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="8890"/>

<ln l="8890" t="4987" r="9480" b="5141" baseLine="5131" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="4987" r="9480" b="5141">0.4342</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="4982" r="10349" b="5141" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="9754"/>

<ln l="9754" t="4982" r="10349" b="5141" baseLine="5131" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9754" t="4982" r="10349" b="5141">0.5546</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6298" t="5256" r="7166" b="5467" alignment="left" li="137" lsp="exactly" lspExact="259" language="en">

<ln l="6298" t="5256" r="7166" b="5467" baseLine="5418">

<wd l="6298" t="5256" r="7166" b="5467"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">&lt;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">2</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="5251" r="8530" b="5410" alignment="left" spaceAfter="14" lsp="exactly" lspExact="264" language="en">

<tabs position="7944"/>

<ln l="7944" t="5251" r="8530" b="5410" baseLine="5400" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="5251" r="8530" b="5410">0.8619</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="5251" r="9480" b="5410" alignment="left" spaceAfter="14" lsp="exactly" lspExact="264" language="en">

<tabs position="8890"/>

<ln l="8890" t="5251" r="9480" b="5410" baseLine="5400" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="5251" r="9480" b="5410">0.7950</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="5256" r="10330" b="5410" alignment="left" spaceAfter="14" lsp="exactly" lspExact="264" language="en">

<tabs position="9754"/>

<ln l="9754" t="5256" r="10330" b="5410" baseLine="5400" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="9754" t="5256" r="10330" b="5410">0.8271</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6298" t="5530" r="7171" b="5741" alignment="left" li="137" lsp="exactly" lspExact="259" language="en">

<ln l="6298" t="5530" r="7171" b="5741" baseLine="5692">

<wd l="6298" t="5530" r="7171" b="5741"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="10">ED</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">&gt;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10">3</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="10"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="5530" r="8525" b="5683" alignment="left" spaceAfter="13" lsp="exactly" lspExact="250" language="en">

<tabs position="7944"/>

<ln l="7944" t="5530" r="8525" b="5683" baseLine="5674" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="7944" t="5530" r="8525" b="5683">0.8793</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="5525" r="9480" b="5683" alignment="left" spaceAfter="13" lsp="exactly" lspExact="250" language="en">

<tabs position="8890"/>

<ln l="8890" t="5525" r="9480" b="5683" baseLine="5674" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="5525" r="9480" b="5683">0.5240</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="5525" r="10344" b="5683" alignment="left" spaceAfter="13" lsp="exactly" lspExact="250" language="en">

<tabs position="9754"/>

<ln l="9754" t="5525" r="10344" b="5683" baseLine="5674" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9754" t="5525" r="10344" b="5683">0.6567</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<para l="6298" t="5794" r="7176" b="5981" alignment="left" li="137" lsp="exactly" lspExact="249" language="en">

<ln l="6298" t="5794" r="7176" b="5981" baseLine="5959">

<wd l="6298" t="5794" r="7176" b="5981"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">toM</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">ABB</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<para l="7944" t="5798" r="8530" b="5952" alignment="left" spaceAfter="4" lsp="exactly" lspExact="245" language="en">

<tabs position="7944"/>

<ln l="7944" t="5798" r="8530" b="5952" baseLine="5942" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="5798" r="8530" b="5952">0.9449</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<para l="8890" t="5794" r="9475" b="5952" alignment="left" spaceAfter="4" lsp="exactly" lspExact="245" language="en">

<tabs position="8890"/>

<ln l="8890" t="5794" r="9475" b="5952" baseLine="5942" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8890" t="5794" r="9475" b="5952">0.8659</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<para l="9754" t="5798" r="10344" b="5952" alignment="left" spaceAfter="4" lsp="exactly" lspExact="245" language="en">

<tabs position="9754"/>

<ln l="9754" t="5798" r="10344" b="5952" baseLine="5942" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9754" t="5798" r="10344" b="5952">0.9037</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="6288" t="6058" r="6442" b="6221" alignment="left" li="137" spaceAfter="17" lsp="exactly" lspExact="261" language="en">

<ln l="6288" t="6058" r="6442" b="6221" baseLine="6216" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6288" t="6058" r="6442" b="6221">O</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<bottomBorder type="single" width="5"/>

<para l="7944" t="6062" r="8534" b="6221" alignment="left" spaceAfter="18" lsp="exactly" lspExact="260" language="en">

<tabs position="7944"/>

<ln l="7944" t="6062" r="8534" b="6221" baseLine="6216" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="6062" r="8534" b="6221">0.9816</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="8890" t="6067" r="9480" b="6221" alignment="left" spaceAfter="18" lsp="exactly" lspExact="260" language="en">

<tabs position="8890"/>

<ln l="8890" t="6067" r="9480" b="6221" baseLine="6216" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8890" t="6067" r="9480" b="6221">0.9932</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<bottomBorder type="single" width="5"/>

<para l="9754" t="6067" r="10349" b="6221" alignment="left" spaceAfter="18" lsp="exactly" lspExact="260" language="en">

<tabs position="9754"/>

<ln l="9754" t="6067" r="10349" b="6221" baseLine="6216" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9754" t="6067" r="10349" b="6221">0.9874</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<rightBorder type="single" width="5"/>

<para l="6278" t="6394" r="7550" b="6547" alignment="left" li="137" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<ln l="6278" t="6394" r="7550" b="6547" baseLine="6494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6278" t="6398" r="6821" b="6504">macro</wd>

<space/>

<wd l="6883" t="6398" r="7550" b="6547">average</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="5"/>

<topBorder type="single" width="5"/>

<para l="7944" t="6350" r="8530" b="6504" alignment="left" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<tabs position="7944"/>

<ln l="7944" t="6350" r="8530" b="6504" baseLine="6494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7944" t="6350" r="8530" b="6504">0.9018</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="8890" t="6346" r="9475" b="6504" alignment="left" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<tabs position="8890"/>

<ln l="8890" t="6346" r="9475" b="6504" baseLine="6494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8890" t="6346" r="9475" b="6504">0.7638</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="5"/>

<para l="9754" t="6350" r="10330" b="6504" alignment="left" spaceAfter="28" lsp="exactly" lspExact="250" language="en">

<tabs position="9754"/>

<ln l="9754" t="6350" r="10330" b="6504" baseLine="6494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="9754" t="6350" r="10330" b="6504">0.8171</wd>

</ln>

</para>

</cell>

</table>

<para l="6144" t="6792" r="10502" b="7224" alignment="justified" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="6792" r="10502" b="6994" baseLine="6946" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="6792" r="6624" b="6950">Table</wd>

<space/>

<wd l="6686" t="6797" r="6830" b="6950">3:</wd>

<space/>

<wd l="6917" t="6792" r="7550" b="6950">Results</wd>

<space/>

<wd l="7618" t="6792" r="7810" b="6950">of</wd>

<space/>

<wd l="7853" t="6792" r="8731" b="6994">predicting</wd>

<space/>

<wd l="8789" t="6792" r="9058" b="6950">the</wd>

<space/>

<wd l="9115" t="6792" r="9998" b="6950">correction</wd>

<space/>

<wd l="10051" t="6816" r="10502" b="6994">types</wd>

<space/>

</ln>

<ln l="6144" t="7066" r="8270" b="7224" baseLine="7214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="7066" r="6403" b="7224">for</wd>

<space/>

<wd l="6456" t="7066" r="7013" b="7224">tokens</wd>

<space/>

<wd l="7080" t="7118" r="7296" b="7224">on</wd>

<space/>

<wd l="7349" t="7066" r="7613" b="7224">the</wd>

<space/>

<wd l="7670" t="7090" r="7978" b="7224">test</wd>

<space/>

<wd l="8040" t="7090" r="8270" b="7224">set</wd>

</ln>

</para>

<para l="6144" t="7714" r="10517" b="11122" alignment="justified" spaceBefore="380" lsp="exactly" lspExact="271" language="en">

<ln l="6149" t="7714" r="10502" b="7915" baseLine="7862" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="7718" r="6744" b="7872">0.9830</wd>

<space/>

<wd l="6835" t="7714" r="7147" b="7872">and</wd>

<space/>

<wd l="7238" t="7714" r="7829" b="7872">0.9746</wd>

<space/>

<wd l="7925" t="7714" r="8237" b="7872">and</wd>

<space/>

<wd l="8323" t="7714" r="8587" b="7872">the</wd>

<space/>

<wd l="8678" t="7714" r="9600" b="7915">proportion</wd>

<space/>

<wd l="9691" t="7714" r="9878" b="7872">of</wd>

<space/>

<wd l="9955" t="7738" r="10502" b="7872">tweets</wd>

<space/>

</ln>

<ln l="6144" t="7982" r="10502" b="8184" baseLine="8136" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="7982" r="6403" b="8141">for</wd>

<space/>

<wd l="6456" t="7982" r="6989" b="8141">which</wd>

<space/>

<wd l="7051" t="7982" r="7253" b="8141">all</wd>

<space/>

<wd l="7320" t="7982" r="7584" b="8141">the</wd>

<space/>

<wd l="7642" t="7982" r="8198" b="8141">tokens</wd>

<space/>

<wd l="8261" t="8035" r="8688" b="8141">were</wd>

<space/>

<wd l="8741" t="7982" r="9326" b="8184">tagged</wd>

<space/>

<wd l="9379" t="7982" r="10114" b="8184">properly</wd>

<space/>

<wd l="10176" t="8035" r="10502" b="8141">was</wd>

<space/>

</ln>

<ln l="6149" t="8256" r="10498" b="8458" baseLine="8405" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="8261" r="6744" b="8414">0.7902</wd>

<space/>

<wd l="6830" t="8256" r="7142" b="8414">and</wd>

<space/>

<wd l="7229" t="8261" r="7810" b="8414">0.7143</wd>

<space/>

<wd l="7906" t="8256" r="8165" b="8414">for</wd>

<space/>

<wd l="8242" t="8256" r="8506" b="8414">the</wd>

<space/>

<wd l="8587" t="8256" r="9264" b="8458">training</wd>

<space/>

<wd l="9355" t="8256" r="9662" b="8414">and</wd>

<space/>

<wd l="9744" t="8280" r="10051" b="8414">test</wd>

<space/>

<wd l="10138" t="8280" r="10498" b="8443">sets,</wd>

<space/>

</ln>

<ln l="6144" t="8525" r="10512" b="8726" baseLine="8678" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="8525" r="7219" b="8726">respectively.</wd>

<space/>

<wd l="7387" t="8530" r="7546" b="8678">A</wd>

<space/>

<wd l="7627" t="8578" r="8074" b="8683">more</wd>

<space/>

<wd l="8165" t="8525" r="8851" b="8683">detailed</wd>

<space/>

<wd l="8933" t="8525" r="9898" b="8683">breakdown</wd>

<space/>

<wd l="9984" t="8525" r="10176" b="8683">of</wd>

<space/>

<wd l="10243" t="8525" r="10512" b="8683">the</wd>

<space/>

</ln>

<ln l="6149" t="8798" r="10507" b="9000" baseLine="8947" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="8798" r="7286" b="8957">classification</wd>

<space/>

<wd l="7334" t="8798" r="8515" b="9000">performances</wd>

<space/>

<wd l="8573" t="8798" r="8765" b="8957">of</wd>

<space/>

<wd l="8803" t="8798" r="9067" b="8957">the</wd>

<space/>

<wd l="9125" t="8851" r="9917" b="9000">sequence</wd>

<space/>

<wd l="9965" t="8798" r="10507" b="8957">model</wd>

<space/>

</ln>

<ln l="6149" t="9067" r="10502" b="9269" baseLine="9216" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9120" r="6365" b="9226">on</wd>

<space/>

<wd l="6451" t="9067" r="6715" b="9226">the</wd>

<space/>

<wd l="6806" t="9067" r="7478" b="9269">training</wd>

<space/>

<wd l="7574" t="9067" r="7886" b="9226">and</wd>

<space/>

<wd l="7973" t="9091" r="8275" b="9226">test</wd>

<space/>

<wd l="8371" t="9091" r="8683" b="9226">sets</wd>

<space/>

<wd l="8784" t="9120" r="9043" b="9226">are</wd>

<space/>

<wd l="9130" t="9067" r="9883" b="9226">included</wd>

<space/>

<wd l="9970" t="9067" r="10138" b="9221">in</wd>

<space/>

<wd l="10224" t="9072" r="10502" b="9226">Ta-</wd>

</ln>

<ln l="6144" t="9341" r="10512" b="9499" baseLine="9490" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="9341" r="6413" b="9499">ble</wd>

<space/>

<wd l="6504" t="9346" r="6605" b="9494">2</wd>

<space/>

<wd l="6706" t="9341" r="7018" b="9499">and</wd>

<space/>

<wd l="7104" t="9341" r="7584" b="9499">Table</wd>

<space/>

<wd l="7675" t="9346" r="7824" b="9499">3.</wd>

<space/>

<wd l="8006" t="9341" r="8525" b="9499">These</wd>

<space/>

<wd l="8616" t="9341" r="9120" b="9499">tables</wd>

<space/>

<wd l="9216" t="9341" r="9734" b="9499">reveal</wd>

<space/>

<wd l="9830" t="9341" r="10157" b="9499">that</wd>

<space/>

<wd l="10243" t="9341" r="10512" b="9499">the</wd>

<space/>

</ln>

<ln l="6144" t="9610" r="10512" b="9811" baseLine="9758" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="9634" r="6571" b="9768">most</wd>

<space/>

<wd l="6662" t="9610" r="7344" b="9768">difficult</wd>

<space/>

<wd l="7430" t="9662" r="7858" b="9768">error</wd>

<space/>

<wd l="7939" t="9634" r="8314" b="9811">type</wd>

<space/>

<wd l="8400" t="9634" r="8568" b="9768">to</wd>

<space/>

<wd l="8654" t="9610" r="9331" b="9811">identify</wd>

<space/>

<wd l="9422" t="9662" r="9749" b="9768">was</wd>

<space/>

<wd l="9845" t="9610" r="10109" b="9768">the</wd>

<space/>

<wd l="10200" t="9662" r="10512" b="9768">one</wd>

<space/>

</ln>

<ln l="6144" t="9878" r="10502" b="10080" baseLine="10032" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="9878" r="6677" b="10037">where</wd>

<space/>

<wd l="6744" t="9931" r="6840" b="10037">a</wd>

<space/>

<wd l="6898" t="9878" r="7344" b="10037">word</wd>

<space/>

<wd l="7406" t="9878" r="8011" b="10037">missed</wd>

<space/>

<wd l="8083" t="9931" r="8534" b="10037">some</wd>

<space/>

<wd l="8597" t="9878" r="9566" b="10080">whitespace</wd>

<space/>

<wd l="9634" t="9878" r="10502" b="10037">characters</wd>

<space/>

</ln>

<ln l="6154" t="10147" r="10512" b="10354" baseLine="10301">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6154" t="10157" r="6547" b="10349">(row</wd>

<space/>

</run>

<wd l="6619" t="10147" r="7944" b="10354"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">MissingW5</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8040" t="10152" r="8419" b="10310">This</wd>

<space/>

<wd l="8491" t="10152" r="8904" b="10310">class</wd>

<space/>

<wd l="8971" t="10152" r="9682" b="10354">happens</wd>

<space/>

<wd l="9749" t="10176" r="9912" b="10310">to</wd>

<space/>

<wd l="9979" t="10152" r="10186" b="10310">be</wd>

<space/>

<wd l="10243" t="10152" r="10512" b="10310">the</wd>

<space/>

</run>

</ln>

<ln l="6144" t="10421" r="10502" b="10622" baseLine="10574" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="10421" r="6547" b="10579">least</wd>

<space/>

<wd l="6614" t="10421" r="7344" b="10622">frequent</wd>

<space/>

<wd l="7411" t="10421" r="7723" b="10579">and</wd>

<space/>

<wd l="7795" t="10474" r="8107" b="10579">one</wd>

<space/>

<wd l="8179" t="10421" r="8371" b="10579">of</wd>

<space/>

<wd l="8424" t="10421" r="8688" b="10579">the</wd>

<space/>

<wd l="8760" t="10445" r="9182" b="10579">most</wd>

<space/>

<wd l="9250" t="10421" r="10502" b="10622">heterogeneous</wd>

<space/>

</ln>

<ln l="6149" t="10694" r="10517" b="10896" baseLine="10843" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="10694" r="6562" b="10853">class</wd>

<space/>

<wd l="6648" t="10747" r="6821" b="10853">as</wd>

<space/>

<wd l="6902" t="10694" r="7320" b="10882">well,</wd>

<space/>

<wd l="7411" t="10694" r="7949" b="10853">which</wd>

<space/>

<wd l="8021" t="10694" r="8530" b="10896">might</wd>

<space/>

<wd l="8606" t="10694" r="8808" b="10853">be</wd>

<space/>

<wd l="8890" t="10747" r="9086" b="10853">an</wd>

<space/>

<wd l="9173" t="10694" r="10181" b="10896">explanation</wd>

<space/>

<wd l="10258" t="10694" r="10517" b="10853">for</wd>

<space/>

</ln>

<ln l="6144" t="10963" r="8760" b="11122" baseLine="11117" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="10963" r="6413" b="11122">the</wd>

<space/>

<wd l="6466" t="10963" r="6960" b="11122">lower</wd>

<space/>

<wd l="7013" t="10963" r="7574" b="11122">results</wd>

<space/>

<wd l="7642" t="11016" r="7858" b="11122">on</wd>

<space/>

<wd l="7910" t="10963" r="8237" b="11122">that</wd>

<space/>

<wd l="8294" t="10963" r="8760" b="11122">class.</wd>

</ln>

</para>

<para l="6144" t="11462" r="9014" b="11664" alignment="left" spaceBefore="244" lsp="exactly" lspExact="254" language="en">

<ln l="6144" t="11462" r="9014" b="11664" baseLine="11611" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="6144" t="11462" r="6413" b="11621">4.2</wd>

<space/>

<wd l="6638" t="11462" r="7704" b="11664">Augmented</wd>

<space/>

<wd l="7757" t="11462" r="8165" b="11621">Edit</wd>

<space/>

<wd l="8213" t="11462" r="9014" b="11621">Distance</wd>

</ln>

</para>

<para l="6144" t="11842" r="10512" b="14482" alignment="justified" spaceBefore="104" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="11842" r="10502" b="12043" baseLine="11990" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="11842" r="6667" b="12000">When</wd>

<space/>

<wd l="6768" t="11842" r="7814" b="12043">determining</wd>

<space/>

<wd l="7920" t="11894" r="8016" b="12000">a</wd>

<space/>

<wd l="8117" t="11866" r="8352" b="12000">set</wd>

<space/>

<wd l="8448" t="11842" r="8640" b="12000">of</wd>

<space/>

<wd l="8726" t="11842" r="9557" b="12000">candidate</wd>

<space/>

<wd l="9653" t="11846" r="9878" b="12000">IV</wd>

<space/>

<wd l="9979" t="11842" r="10502" b="12000">words</wd>

<space/>

</ln>

<ln l="6144" t="12110" r="10502" b="12312" baseLine="12264" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="12110" r="6475" b="12269">that</wd>

<space/>

<wd l="6552" t="12163" r="6749" b="12269">an</wd>

<space/>

<wd l="6835" t="12115" r="7286" b="12269">OOV</wd>

<space/>

<wd l="7368" t="12110" r="7877" b="12312">might</wd>

<space/>

<wd l="7954" t="12110" r="8155" b="12269">be</wd>

<space/>

<wd l="8232" t="12110" r="9019" b="12269">rewritten</wd>

<space/>

<wd l="9091" t="12110" r="9379" b="12298">for,</wd>

<space/>

<wd l="9470" t="12110" r="9595" b="12269">it</wd>

<space/>

<wd l="9667" t="12110" r="9806" b="12269">is</wd>

<space/>

<wd l="9893" t="12163" r="9989" b="12269">a</wd>

<space/>

<wd l="10066" t="12163" r="10502" b="12269">com-</wd>

</ln>

<ln l="6144" t="12384" r="10512" b="12586" baseLine="12533" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="12437" r="6533" b="12542">mon</wd>

<space/>

<wd l="6600" t="12384" r="7291" b="12586">practice</wd>

<space/>

<wd l="7358" t="12408" r="7522" b="12542">to</wd>

<space/>

<wd l="7594" t="12384" r="8054" b="12586">place</wd>

<space/>

<wd l="8126" t="12437" r="8323" b="12542">an</wd>

<space/>

<wd l="8390" t="12437" r="8894" b="12586">upper</wd>

<space/>

<wd l="8957" t="12384" r="9499" b="12542">bound</wd>

<space/>

<wd l="9571" t="12437" r="9787" b="12542">on</wd>

<space/>

<wd l="9850" t="12384" r="10118" b="12542">the</wd>

<space/>

<wd l="10190" t="12384" r="10512" b="12542">edit</wd>

<space/>

</ln>

<ln l="6149" t="12653" r="10507" b="12811" baseLine="12802" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="12653" r="6859" b="12811">distance</wd>

<space/>

<wd l="6926" t="12653" r="7656" b="12811">between</wd>

<space/>

<wd l="7718" t="12653" r="7987" b="12811">the</wd>

<space/>

<wd l="8054" t="12658" r="8280" b="12811">IV</wd>

<space/>

<wd l="8352" t="12653" r="9259" b="12811">candidates</wd>

<space/>

<wd l="9341" t="12653" r="9653" b="12811">and</wd>

<space/>

<wd l="9715" t="12653" r="9984" b="12811">the</wd>

<space/>

<wd l="10056" t="12658" r="10507" b="12811">OOV</wd>

<space/>

</ln>

<ln l="6144" t="12926" r="10512" b="13085" baseLine="13075" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="12926" r="6634" b="13085">word.</wd>

<space/>

<wd l="6768" t="12931" r="6950" b="13080">In</wd>

<space/>

<wd l="7027" t="12926" r="7488" b="13085">order</wd>

<space/>

<wd l="7555" t="12950" r="7723" b="13085">to</wd>

<space/>

<wd l="7795" t="12979" r="8525" b="13085">measure</wd>

<space/>

<wd l="8602" t="12926" r="8928" b="13085">edit</wd>

<space/>

<wd l="9000" t="12926" r="9710" b="13085">distance</wd>

<space/>

<wd l="9782" t="12926" r="10512" b="13085">between</wd>

<space/>

</ln>

<ln l="6144" t="13195" r="10512" b="13397" baseLine="13344" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="13195" r="6706" b="13354">tokens</wd>

<space/>

<wd l="6773" t="13195" r="7723" b="13397">originating</wd>

<space/>

<wd l="7781" t="13195" r="8208" b="13354">from</wd>

<space/>

<wd l="8261" t="13219" r="8808" b="13354">tweets</wd>

<space/>

<wd l="8880" t="13195" r="9192" b="13354">and</wd>

<space/>

<wd l="9245" t="13195" r="9648" b="13354">their</wd>

<space/>

<wd l="9706" t="13195" r="10512" b="13354">corrected</wd>

<space/>

</ln>

<ln l="6144" t="13464" r="10502" b="13666" baseLine="13618" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="13464" r="6696" b="13651">forms,</wd>

<space/>

<wd l="6758" t="13517" r="7013" b="13622">we</wd>

<space/>

<wd l="7061" t="13464" r="8203" b="13666">implemented</wd>

<space/>

<wd l="8251" t="13517" r="8347" b="13622">a</wd>

<space/>

<wd l="8390" t="13464" r="9494" b="13622">modification</wd>

<space/>

<wd l="9547" t="13464" r="9739" b="13622">of</wd>

<space/>

<wd l="9773" t="13464" r="10037" b="13622">the</wd>

<space/>

<wd l="10094" t="13488" r="10502" b="13622">stan-</wd>

</ln>

<ln l="6149" t="13738" r="10502" b="13939" baseLine="13886" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13738" r="6533" b="13896">dard</wd>

<space/>

<wd l="6605" t="13738" r="6926" b="13896">edit</wd>

<space/>

<wd l="6994" t="13738" r="7704" b="13896">distance</wd>

<space/>

<wd l="7771" t="13738" r="8621" b="13939">algorithm</wd>

<space/>

<wd l="8683" t="13738" r="9010" b="13896">that</wd>

<space/>

<wd l="9072" t="13738" r="9211" b="13896">is</wd>

<space/>

<wd l="9288" t="13738" r="10152" b="13939">especially</wd>

<space/>

<wd l="10219" t="13738" r="10502" b="13896">tai-</wd>

</ln>

<ln l="6144" t="14006" r="10502" b="14208" baseLine="14160" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="14006" r="6595" b="14165">lored</wd>

<space/>

<wd l="6662" t="14030" r="6830" b="14165">to</wd>

<space/>

<wd l="6902" t="14006" r="7805" b="14208">measuring</wd>

<space/>

<wd l="7882" t="14006" r="8146" b="14165">the</wd>

<space/>

<wd l="8218" t="14006" r="9091" b="14165">difference</wd>

<space/>

<wd l="9168" t="14006" r="9360" b="14165">of</wd>

<space/>

<wd l="9418" t="14011" r="9869" b="14165">OOV</wd>

<space/>

<wd l="9946" t="14006" r="10502" b="14165">tokens</wd>

<space/>

</ln>

<ln l="6149" t="14280" r="9739" b="14482" baseLine="14429" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="14280" r="7099" b="14482">originating</wd>

<space/>

<wd l="7157" t="14280" r="7584" b="14438">from</wd>

<space/>

<wd l="7646" t="14280" r="8141" b="14438">social</wd>

<space/>

<wd l="8198" t="14280" r="8736" b="14438">media</wd>

<space/>

<wd l="8789" t="14304" r="8952" b="14438">to</wd>

<space/>

<wd l="9014" t="14285" r="9240" b="14438">IV</wd>

<space/>

<wd l="9302" t="14333" r="9739" b="14438">ones.</wd>

</ln>

</para>

<para l="6144" t="14563" r="10512" b="15293" alignment="justified" spaceBefore="17" spaceAfter="19" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="6365" t="14563" r="10507" b="14765" baseLine="14717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6365" t="14563" r="6701" b="14722">The</wd>

<space/>

<wd l="6787" t="14563" r="7114" b="14722">edit</wd>

<space/>

<wd l="7200" t="14563" r="7906" b="14722">distance</wd>

<space/>

<wd l="7992" t="14616" r="8242" b="14722">we</wd>

<space/>

<wd l="8333" t="14563" r="9187" b="14765">employed</wd>

<space/>

<wd l="9264" t="14563" r="9403" b="14722">is</wd>

<space/>

<wd l="9499" t="14563" r="10507" b="14765">asymmetric</wd>

<space/>

</ln>

<ln l="6149" t="14837" r="10512" b="14995" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="14890" r="6322" b="14995">as</wd>

<space/>

<wd l="6398" t="14837" r="7238" b="14995">insertions</wd>

<space/>

<wd l="7325" t="14837" r="7512" b="14995">of</wd>

<space/>

<wd l="7574" t="14837" r="8448" b="14995">characters</wd>

<space/>

<wd l="8525" t="14837" r="8861" b="14995">into</wd>

<space/>

<wd l="8942" t="14842" r="9394" b="14995">OOV</wd>

<space/>

<wd l="9470" t="14837" r="10027" b="14995">tokens</wd>

<space/>

<wd l="10109" t="14837" r="10512" b="14995">have</wd>

<space/>

</ln>

<ln l="6144" t="15106" r="10502" b="15293" baseLine="15259">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6144" t="15158" r="6360" b="15264">no</wd>

<space/>

<wd l="6427" t="15130" r="6902" b="15264">costs.</wd>

<space/>

<wd l="6984" t="15110" r="7291" b="15264">For</wd>

<space/>

<wd l="7344" t="15106" r="8102" b="15293">instance,</wd>

<space/>

<wd l="8170" t="15106" r="8429" b="15264">for</wd>

<space/>

<wd l="8482" t="15106" r="8746" b="15264">the</wd>

<space/>

<wd l="8803" t="15106" r="9326" b="15264">words</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9398" t="15139" r="9840" b="15264">tmrw</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9902" t="15106" r="10214" b="15264">and</wd>

<space/>

</run>

<wd l="10272" t="15139" r="10502" b="15264" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">to-</wd>

<run fontFace="Times New Roman" fontFamily="roman" fontPitch="variable"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="1439" t="15736" r="10523" b="15977">

<para l="5771" t="15792" r="6200" b="15941" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5837" t="15792" r="6134" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="25">

<wd l="5837" t="15792" r="6134" b="15941">122</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4318.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1225" marginRight="1385" marginBottom="1292" offsetX="-20" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1225" r="10524" b="15386">

<column l="1440" t="1225" r="5822" b="15386">

<para l="1440" t="1320" r="5818" b="3691" alignment="justified" spaceBefore="31" lsp="exactly" lspExact="271" language="en">

<ln l="1440" t="1320" r="5803" b="1522" baseLine="1474">

<wd l="1440" t="1373" r="2165" b="1507"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">morrow</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2261" t="1320" r="2530" b="1478">the</wd>

<space/>

<wd l="2611" t="1320" r="2938" b="1478">edit</wd>

<space/>

<wd l="3019" t="1320" r="3730" b="1478">distance</wd>

<space/>

<wd l="3811" t="1320" r="3950" b="1478">is</wd>

<space/>

<wd l="4037" t="1320" r="4795" b="1522">regarded</wd>

<space/>

<wd l="4882" t="1373" r="5050" b="1478">as</wd>

<space/>

<wd l="5141" t="1325" r="5242" b="1478">0</wd>

<space/>

<wd l="5328" t="1320" r="5472" b="1474">if</wd>

<space/>

<wd l="5539" t="1320" r="5803" b="1478">the</wd>

<space/>

</run>

</ln>

<ln l="1440" t="1594" r="5803" b="1752" baseLine="1742" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="1594" r="2040" b="1752">former</wd>

<space/>

<wd l="2112" t="1594" r="2246" b="1752">is</wd>

<space/>

<wd l="2338" t="1594" r="3278" b="1752">considered</wd>

<space/>

<wd l="3355" t="1618" r="3518" b="1752">to</wd>

<space/>

<wd l="3600" t="1594" r="3806" b="1752">be</wd>

<space/>

<wd l="3883" t="1594" r="4147" b="1752">the</wd>

<space/>

<wd l="4234" t="1594" r="5270" b="1752">substandard</wd>

<space/>

<wd l="5347" t="1598" r="5803" b="1752">OOV</wd>

<space/>

</ln>

<ln l="1440" t="1862" r="5794" b="2021" baseLine="2016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="1862" r="1925" b="2021">token</wd>

<space/>

<wd l="2011" t="1862" r="2323" b="2021">and</wd>

<space/>

<wd l="2405" t="1862" r="2669" b="2021">the</wd>

<space/>

<wd l="2755" t="1862" r="3206" b="2021">latter</wd>

<space/>

<wd l="3293" t="1915" r="3600" b="2021">one</wd>

<space/>

<wd l="3691" t="1915" r="3859" b="2021">as</wd>

<space/>

<wd l="3950" t="1862" r="4214" b="2021">the</wd>

<space/>

<wd l="4310" t="1862" r="5040" b="2021">standard</wd>

<space/>

<wd l="5122" t="1867" r="5347" b="2021">IV</wd>

<space/>

<wd l="5438" t="1915" r="5794" b="2021">one.</wd>

<space/>

</ln>

<ln l="1440" t="2136" r="5798" b="2323" baseLine="2285" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="2141" r="1906" b="2323">Note,</wd>

<space/>

<wd l="1997" t="2136" r="2770" b="2323">however,</wd>

<space/>

<wd l="2861" t="2136" r="3010" b="2290">if</wd>

<space/>

<wd l="3072" t="2136" r="3336" b="2294">the</wd>

<space/>

<wd l="3413" t="2136" r="3749" b="2294">role</wd>

<space/>

<wd l="3830" t="2136" r="4022" b="2294">of</wd>

<space/>

<wd l="4085" t="2136" r="4349" b="2294">the</wd>

<space/>

<wd l="4426" t="2160" r="4747" b="2294">two</wd>

<space/>

<wd l="4824" t="2136" r="5386" b="2294">tokens</wd>

<space/>

<wd l="5467" t="2189" r="5798" b="2294">was</wd>

<space/>

</ln>

<ln l="1445" t="2405" r="5794" b="2606" baseLine="2558">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="2405" r="2170" b="2606">changed</wd>

<space/>

<wd l="2237" t="2405" r="2510" b="2602">(i.e</wd>

<space/>

<wd l="2568" t="2405" r="2717" b="2558">if</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2765" t="2438" r="3206" b="2563">tmrw</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3269" t="2458" r="3595" b="2563">was</wd>

<space/>

<wd l="3662" t="2405" r="4258" b="2563">treated</wd>

<space/>

<wd l="4320" t="2458" r="4488" b="2563">as</wd>

<space/>

<wd l="4555" t="2410" r="4781" b="2563">IV</wd>

<space/>

<wd l="4848" t="2405" r="5160" b="2563">and</wd>

<space/>

</run>

<wd l="5222" t="2438" r="5794" b="2563" italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">tomor-</wd>

</ln>

<ln l="1450" t="2678" r="5794" b="2875" baseLine="2827">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1450" t="2731" r="1766" b="2837">row</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1829" t="2731" r="1997" b="2837">as</wd>

<space/>

<wd l="2064" t="2683" r="2640" b="2875">OOV),</wd>

<space/>

<wd l="2702" t="2678" r="3110" b="2837">their</wd>

<space/>

<wd l="3163" t="2678" r="3490" b="2837">edit</wd>

<space/>

<wd l="3547" t="2678" r="4253" b="2837">distance</wd>

<space/>

<wd l="4310" t="2678" r="4853" b="2837">would</wd>

<space/>

<wd l="4910" t="2678" r="5587" b="2837">become</wd>

<space/>

<wd l="5640" t="2683" r="5794" b="2837">4.</wd>

<space/>

</run>

</ln>

<ln l="1440" t="2947" r="5803" b="3106" baseLine="3101" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="2952" r="1598" b="3101">A</wd>

<space/>

<wd l="1670" t="2947" r="2270" b="3106">further</wd>

<space/>

<wd l="2342" t="2947" r="3216" b="3106">relaxation</wd>

<space/>

<wd l="3288" t="2971" r="3456" b="3106">to</wd>

<space/>

<wd l="3533" t="2947" r="3797" b="3106">the</wd>

<space/>

<wd l="3883" t="2947" r="4613" b="3106">standard</wd>

<space/>

<wd l="4694" t="2947" r="5016" b="3106">edit</wd>

<space/>

<wd l="5098" t="2947" r="5803" b="3106">distance</wd>

<space/>

</ln>

<ln l="1440" t="3221" r="5818" b="3422" baseLine="3370" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="3221" r="1579" b="3379">is</wd>

<space/>

<wd l="1656" t="3221" r="1982" b="3379">that</wd>

<space/>

<wd l="2054" t="3274" r="2304" b="3379">we</wd>

<space/>

<wd l="2381" t="3221" r="2923" b="3422">assign</wd>

<space/>

<wd l="2995" t="3226" r="3096" b="3379">0</wd>

<space/>

<wd l="3178" t="3245" r="3523" b="3379">cost</wd>

<space/>

<wd l="3590" t="3245" r="3758" b="3379">to</wd>

<space/>

<wd l="3830" t="3221" r="4099" b="3379">the</wd>

<space/>

<wd l="4166" t="3221" r="5006" b="3422">following</wd>

<space/>

<wd l="5083" t="3221" r="5544" b="3379">kinds</wd>

<space/>

<wd l="5630" t="3221" r="5818" b="3379">of</wd>

<space/>

</ln>

<ln l="1440" t="3490" r="4718" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="3490" r="2515" b="3691">phonetically</wd>

<space/>

<wd l="2573" t="3490" r="3437" b="3648">motivated</wd>

<space/>

<wd l="3490" t="3490" r="4718" b="3691">transcriptions:</wd>

</ln>

</para>

<para l="1675" t="3917" r="5808" b="4392" alignment="justified" li="432" spaceBefore="179" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1675" t="3917" r="5808" b="4128" baseLine="4075">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1675" t="3979" r="1882" b="4075">•</wd>

<tab position="1729"/>

<wd l="1882" t="3979" r="1982" b="4085">z</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="2107" t="3984" r="2304" b="4066">-*</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2434" t="3979" r="2520" b="4085">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2606" t="3926" r="3235" b="4085">located</wd>

<space/>

<wd l="3322" t="3950" r="3475" b="4085">at</wd>

<space/>

<wd l="3552" t="3926" r="3816" b="4085">the</wd>

<space/>

<wd l="3902" t="3926" r="4214" b="4085">end</wd>

<space/>

<wd l="4301" t="3926" r="4488" b="4085">of</wd>

<space/>

<wd l="4555" t="3926" r="5078" b="4085">words</wd>

<space/>

<wd l="5179" t="3931" r="5544" b="4128">(e.g.</wd>

<space/>

<wd l="5635" t="3926" r="5808" b="4080">in</wd>

<space/>

</run>

</ln>

<ln l="1882" t="4186" r="3120" b="4392" baseLine="4344">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1882" t="4210" r="2270" b="4354">catz</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="2347" t="4253" r="2544" b="4334">-*</wd>

<space/>

</run>

<wd l="2621" t="4200" r="3120" b="4392"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">cats</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1675" t="4637" r="5808" b="5112" alignment="justified" li="432" spaceBefore="173" fli="-216" lsp="exactly" lspExact="271" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1675" t="4637" r="5808" b="4843" baseLine="4790">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1675" t="4694" r="1882" b="4790">•</wd>

<tab position="1729"/>

<wd l="1882" t="4694" r="1987" b="4800">a</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="2093" t="4699" r="2290" b="4781">-*</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2400" t="4694" r="2587" b="4800">er</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2669" t="4642" r="3302" b="4800">located</wd>

<space/>

<wd l="3374" t="4666" r="3528" b="4800">at</wd>

<space/>

<wd l="3600" t="4642" r="3864" b="4800">the</wd>

<space/>

<wd l="3941" t="4642" r="4253" b="4800">end</wd>

<space/>

<wd l="4325" t="4642" r="4517" b="4800">of</wd>

<space/>

<wd l="4574" t="4642" r="5098" b="4800">words</wd>

<space/>

<wd l="5189" t="4646" r="5554" b="4843">(e.g.</wd>

<space/>

<wd l="5635" t="4642" r="5808" b="4795">in</wd>

<space/>

</run>

</ln>

<ln l="1886" t="4901" r="3437" b="5112" baseLine="5059">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1886" t="4910" r="2381" b="5112">bigga</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="2453" t="4968" r="2650" b="5050">-*</wd>

<space/>

</run>

<wd l="2731" t="4910" r="3437" b="5112"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">bigger</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1440" t="5342" r="5808" b="8525" alignment="justified" spaceBefore="151" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="1656" t="5342" r="5794" b="5544" baseLine="5496" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1656" t="5347" r="1910" b="5544">By</wd>

<space/>

<wd l="2016" t="5342" r="2664" b="5544">making</wd>

<space/>

<wd l="2770" t="5342" r="3038" b="5501">the</wd>

<space/>

<wd l="3144" t="5342" r="3653" b="5501">above</wd>

<space/>

<wd l="3754" t="5342" r="4704" b="5501">relaxations</wd>

<space/>

<wd l="4814" t="5366" r="4982" b="5501">to</wd>

<space/>

<wd l="5088" t="5342" r="5352" b="5501">the</wd>

<space/>

<wd l="5458" t="5342" r="5794" b="5501">def-</wd>

</ln>

<ln l="1440" t="5616" r="5808" b="5803" baseLine="5765" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="5616" r="2011" b="5774">inition</wd>

<space/>

<wd l="2122" t="5616" r="2314" b="5774">of</wd>

<space/>

<wd l="2410" t="5616" r="2674" b="5774">the</wd>

<space/>

<wd l="2794" t="5616" r="3523" b="5774">standard</wd>

<space/>

<wd l="3634" t="5616" r="3960" b="5774">edit</wd>

<space/>

<wd l="4070" t="5616" r="4824" b="5803">distance,</wd>

<space/>

<wd l="4958" t="5669" r="5213" b="5774">we</wd>

<space/>

<wd l="5323" t="5616" r="5808" b="5774">could</wd>

<space/>

</ln>

<ln l="1445" t="5885" r="5794" b="6086" baseLine="6038" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="5885" r="1987" b="6043">obtain</wd>

<space/>

<wd l="2069" t="5885" r="2578" b="6086">larger</wd>

<space/>

<wd l="2664" t="5885" r="3494" b="6043">candidate</wd>

<space/>

<wd l="3586" t="5909" r="3898" b="6043">sets</wd>

<space/>

<wd l="3989" t="5885" r="4248" b="6043">for</wd>

<space/>

<wd l="4330" t="5938" r="4426" b="6043">a</wd>

<space/>

<wd l="4512" t="5885" r="4982" b="6086">given</wd>

<space/>

<wd l="5074" t="5885" r="5395" b="6043">edit</wd>

<space/>

<wd l="5482" t="5885" r="5794" b="6043">dis-</wd>

</ln>

<ln l="1440" t="6158" r="5803" b="6360" baseLine="6307" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="6182" r="1901" b="6317">tance</wd>

<space/>

<wd l="1944" t="6158" r="2755" b="6317">threshold</wd>

<space/>

<wd l="2798" t="6158" r="3058" b="6317">for</wd>

<space/>

<wd l="3096" t="6158" r="3658" b="6317">tokens</wd>

<space/>

<wd l="3710" t="6158" r="4099" b="6317">with</wd>

<space/>

<wd l="4138" t="6158" r="4699" b="6360">higher</wd>

<space/>

<wd l="4742" t="6158" r="5270" b="6346">recall,</wd>

<space/>

<wd l="5333" t="6211" r="5501" b="6317">as</wd>

<space/>

<wd l="5549" t="6211" r="5803" b="6317">we</wd>

<space/>

</ln>

<ln l="1445" t="6427" r="5803" b="6586" baseLine="6581" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="6427" r="1925" b="6586">could</wd>

<space/>

<wd l="2002" t="6427" r="2582" b="6586">reduce</wd>

<space/>

<wd l="2659" t="6427" r="2923" b="6586">the</wd>

<space/>

<wd l="3005" t="6427" r="3331" b="6586">edit</wd>

<space/>

<wd l="3413" t="6427" r="4118" b="6586">distance</wd>

<space/>

<wd l="4195" t="6427" r="4925" b="6586">between</wd>

<space/>

<wd l="5002" t="6427" r="5266" b="6586">the</wd>

<space/>

<wd l="5347" t="6432" r="5803" b="6586">OOV</wd>

<space/>

</ln>

<ln l="1440" t="6701" r="5803" b="6902" baseLine="6850" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="6701" r="1963" b="6859">words</wd>

<space/>

<wd l="2026" t="6701" r="2338" b="6859">and</wd>

<space/>

<wd l="2386" t="6701" r="2789" b="6859">their</wd>

<space/>

<wd l="2842" t="6701" r="3826" b="6902">appropriate</wd>

<space/>

<wd l="3878" t="6706" r="4104" b="6859">IV</wd>

<space/>

<wd l="4162" t="6701" r="5059" b="6902">equivalent</wd>

<space/>

<wd l="5102" t="6701" r="5275" b="6854">in</wd>

<space/>

<wd l="5323" t="6754" r="5803" b="6902">many</wd>

<space/>

</ln>

<ln l="1445" t="6970" r="5808" b="7171" baseLine="7123" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="7022" r="1944" b="7128">cases.</wd>

<space/>

<wd l="2117" t="6970" r="3048" b="7171">Obviously,</wd>

<space/>

<wd l="3154" t="7022" r="3322" b="7128">as</wd>

<space/>

<wd l="3413" t="6970" r="3682" b="7128">the</wd>

<space/>

<wd l="3768" t="6970" r="4598" b="7128">candidate</wd>

<space/>

<wd l="4694" t="6994" r="4930" b="7128">set</wd>

<space/>

<wd l="5016" t="7022" r="5582" b="7171">grows,</wd>

<space/>

<wd l="5683" t="6970" r="5808" b="7128">it</wd>

<space/>

</ln>

<ln l="1440" t="7243" r="5794" b="7445" baseLine="7392" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="7243" r="1949" b="7445">might</wd>

<space/>

<wd l="2016" t="7267" r="2280" b="7445">get</wd>

<space/>

<wd l="2342" t="7243" r="3408" b="7445">increasingly</wd>

<space/>

<wd l="3480" t="7243" r="4162" b="7402">difficult</wd>

<space/>

<wd l="4224" t="7267" r="4392" b="7402">to</wd>

<space/>

<wd l="4464" t="7243" r="5064" b="7402">choose</wd>

<space/>

<wd l="5126" t="7243" r="5395" b="7402">the</wd>

<space/>

<wd l="5462" t="7296" r="5794" b="7402">cor-</wd>

</ln>

<ln l="1440" t="7512" r="5803" b="7714" baseLine="7661" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="7536" r="1771" b="7670">rect</wd>

<space/>

<wd l="1829" t="7512" r="3043" b="7670">normalization</wd>

<space/>

<wd l="3106" t="7512" r="3533" b="7670">from</wd>

<space/>

<wd l="3590" t="7512" r="3758" b="7670">it.</wd>

<space/>

<wd l="3864" t="7517" r="4680" b="7699">However,</wd>

<space/>

<wd l="4762" t="7536" r="4915" b="7670">at</wd>

<space/>

<wd l="4978" t="7512" r="5285" b="7670">this</wd>

<space/>

<wd l="5366" t="7536" r="5803" b="7714">stage</wd>

<space/>

</ln>

<ln l="1445" t="7786" r="5803" b="7987" baseLine="7934" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="7786" r="1637" b="7944">of</wd>

<space/>

<wd l="1675" t="7838" r="1968" b="7944">our</wd>

<space/>

<wd l="2011" t="7786" r="2760" b="7987">pipeline,</wd>

<space/>

<wd l="2822" t="7838" r="3077" b="7944">we</wd>

<space/>

<wd l="3125" t="7838" r="3547" b="7944">were</wd>

<space/>

<wd l="3600" t="7838" r="4046" b="7944">more</wd>

<space/>

<wd l="4099" t="7786" r="4949" b="7944">interested</wd>

<space/>

<wd l="4997" t="7786" r="5170" b="7939">in</wd>

<space/>

<wd l="5218" t="7786" r="5803" b="7987">having</wd>

<space/>

</ln>

<ln l="1440" t="8054" r="5794" b="8213" baseLine="8203" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="8054" r="1704" b="8213">the</wd>

<space/>

<wd l="1762" t="8078" r="2362" b="8213">correct</wd>

<space/>

<wd l="2410" t="8059" r="2635" b="8213">IV</wd>

<space/>

<wd l="2688" t="8054" r="3134" b="8213">word</wd>

<space/>

<wd l="3178" t="8054" r="3350" b="8208">in</wd>

<space/>

<wd l="3398" t="8054" r="3662" b="8213">the</wd>

<space/>

<wd l="3720" t="8078" r="3955" b="8213">set</wd>

<space/>

<wd l="4008" t="8054" r="4195" b="8213">of</wd>

<space/>

<wd l="4234" t="8054" r="5064" b="8213">candidate</wd>

<space/>

<wd l="5117" t="8054" r="5794" b="8213">normal-</wd>

</ln>

<ln l="1440" t="8323" r="4598" b="8525" baseLine="8477" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="8323" r="2078" b="8510">ization,</wd>

<space/>

<wd l="2141" t="8323" r="2654" b="8482">rather</wd>

<space/>

<wd l="2707" t="8323" r="3077" b="8482">than</wd>

<space/>

<wd l="3134" t="8323" r="3898" b="8525">reducing</wd>

<space/>

<wd l="3955" t="8323" r="4152" b="8482">its</wd>

<space/>

<wd l="4224" t="8323" r="4598" b="8482">size.</wd>

</ln>

</para>

<para l="1440" t="8770" r="5664" b="8971" alignment="left" spaceBefore="192" lsp="exactly" lspExact="254" language="en">

<ln l="1440" t="8770" r="5664" b="8971" baseLine="8918" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1440" t="8770" r="1709" b="8928">4.3</wd>

<space/>

<wd l="1930" t="8770" r="2654" b="8971">Making</wd>

<space/>

<wd l="2712" t="8774" r="3053" b="8928">Use</wd>

<space/>

<wd l="3110" t="8770" r="3302" b="8928">of</wd>

<space/>

<wd l="3346" t="8770" r="4027" b="8928">Twitter</wd>

<space/>

<wd l="4085" t="8818" r="4776" b="8971">n-gram</wd>

<space/>

<wd l="4834" t="8770" r="5664" b="8928">Statistics</wd>

</ln>

</para>

<para l="1440" t="9120" r="5818" b="11491" alignment="justified" spaceBefore="76" lsp="exactly" lspExact="270" language="en">

<ln l="1445" t="9120" r="5808" b="9322" baseLine="9269" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="9125" r="1786" b="9278">Our</wd>

<space/>

<wd l="1862" t="9120" r="2304" b="9278">basic</wd>

<space/>

<wd l="2400" t="9120" r="3394" b="9322">assumption</wd>

<space/>

<wd l="3475" t="9173" r="3802" b="9278">was</wd>

<space/>

<wd l="3898" t="9120" r="4224" b="9278">that</wd>

<space/>

<wd l="4306" t="9120" r="4733" b="9278">from</wd>

<space/>

<wd l="4814" t="9120" r="5083" b="9278">the</wd>

<space/>

<wd l="5170" t="9144" r="5808" b="9278">context</wd>

<space/>

</ln>

<ln l="1445" t="9389" r="5798" b="9590" baseLine="9542" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="9389" r="1637" b="9547">of</wd>

<space/>

<wd l="1704" t="9442" r="1901" b="9547">an</wd>

<space/>

<wd l="1982" t="9394" r="2438" b="9547">OOV</wd>

<space/>

<wd l="2515" t="9389" r="3005" b="9576">word,</wd>

<space/>

<wd l="3101" t="9389" r="3221" b="9547">it</wd>

<space/>

<wd l="3298" t="9389" r="3432" b="9547">is</wd>

<space/>

<wd l="3518" t="9389" r="4234" b="9590">possible</wd>

<space/>

<wd l="4310" t="9413" r="4474" b="9547">to</wd>

<space/>

<wd l="4555" t="9413" r="5525" b="9547">reconstruct</wd>

<space/>

<wd l="5602" t="9389" r="5798" b="9547">its</wd>

<space/>

</ln>

<ln l="1440" t="9662" r="5794" b="9864" baseLine="9811" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="9667" r="1666" b="9821">IV</wd>

<space/>

<wd l="1738" t="9662" r="2674" b="9864">equivalent,</wd>

<space/>

<wd l="2755" t="9715" r="2923" b="9821">as</wd>

<space/>

<wd l="2995" t="9662" r="3432" b="9821">there</wd>

<space/>

<wd l="3499" t="9715" r="3758" b="9821">are</wd>

<space/>

<wd l="3826" t="9715" r="4267" b="9821">users</wd>

<space/>

<wd l="4334" t="9662" r="4709" b="9821">who</wd>

<space/>

<wd l="4776" t="9715" r="5064" b="9821">use</wd>

<space/>

<wd l="5131" t="9662" r="5395" b="9821">the</wd>

<space/>

<wd l="5462" t="9715" r="5794" b="9821">cor-</wd>

</ln>

<ln l="1440" t="9931" r="5803" b="10133" baseLine="10085" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="9955" r="1771" b="10090">rect</wd>

<space/>

<wd l="1829" t="9936" r="2054" b="10090">IV</wd>

<space/>

<wd l="2117" t="9931" r="2784" b="10133">English</wd>

<space/>

<wd l="2842" t="9931" r="3269" b="10090">form</wd>

<space/>

<wd l="3331" t="9931" r="3523" b="10090">of</wd>

<space/>

<wd l="3566" t="9931" r="3835" b="10090">the</wd>

<space/>

<wd l="3898" t="9936" r="4354" b="10090">OOV</wd>

<space/>

<wd l="4416" t="9931" r="4862" b="10090">word</wd>

<space/>

<wd l="4920" t="9931" r="5482" b="10090">within</wd>

<space/>

<wd l="5539" t="9931" r="5803" b="10090">the</wd>

<space/>

</ln>

<ln l="1450" t="10205" r="5818" b="10406" baseLine="10354">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1450" t="10258" r="1886" b="10363">same</wd>

<space/>

<wd l="1987" t="10229" r="2664" b="10392">context,</wd>

<space/>

<wd l="2784" t="10258" r="3082" b="10406">e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="3187" t="10258" r="3461" b="10363">see</wd>

<space/>

<wd l="3547" t="10258" r="3869" b="10406">you</wd>

<space/>

<wd l="3974" t="10238" r="4819" b="10363">tomorrow</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="4910" t="10205" r="5530" b="10363">instead</wd>

<space/>

<wd l="5630" t="10205" r="5818" b="10363">of</wd>

<space/>

</run>

</ln>

<ln l="1440" t="10474" r="5803" b="10675" baseLine="10627">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1440" t="10526" r="1714" b="10632">see</wd>

<space/>

<wd l="1805" t="10526" r="1906" b="10632">u</wd>

<space/>

</run>

<wd l="1992" t="10507" r="2477" b="10632"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">tmrw</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="2630" t="10474" r="2971" b="10632">The</wd>

<space/>

<wd l="3053" t="10474" r="3682" b="10632">Twitter</wd>

<space/>

<wd l="3758" t="10526" r="4387" b="10675">n-gram</wd>

<space/>

<wd l="4464" t="10474" r="5462" b="10675">frequencies</wd>

<space/>

<wd l="5549" t="10526" r="5803" b="10632">we</wd>

<space/>

</run>

</ln>

<ln l="1440" t="10747" r="5808" b="10949" baseLine="10896" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="10747" r="1910" b="10906">made</wd>

<space/>

<wd l="1958" t="10800" r="2246" b="10906">use</wd>

<space/>

<wd l="2299" t="10747" r="2491" b="10906">of</wd>

<space/>

<wd l="2525" t="10800" r="2947" b="10906">were</wd>

<space/>

<wd l="2995" t="10747" r="3259" b="10906">the</wd>

<space/>

<wd l="3312" t="10800" r="3696" b="10906">ones</wd>

<space/>

<wd l="3754" t="10747" r="4080" b="10906">that</wd>

<space/>

<wd l="4123" t="10800" r="4378" b="10906">we</wd>

<space/>

<wd l="4430" t="10747" r="5381" b="10949">aggregated</wd>

<space/>

<wd l="5429" t="10800" r="5808" b="10906">over</wd>

<space/>

</ln>

<ln l="1440" t="11016" r="5794" b="11218" baseLine="11165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="11016" r="1704" b="11174">the</wd>

<space/>

<wd l="1766" t="11016" r="2395" b="11174">Twitter</wd>

<space/>

<wd l="2453" t="11069" r="3086" b="11218">n-gram</wd>

<space/>

<wd l="3144" t="11069" r="3715" b="11218">corpus</wd>

<space/>

<wd l="3787" t="11016" r="4742" b="11218">augmented</wd>

<space/>

<wd l="4800" t="11016" r="5189" b="11174">with</wd>

<space/>

<wd l="5251" t="11016" r="5794" b="11174">demo-</wd>

</ln>

<ln l="1445" t="11290" r="5794" b="11491" baseLine="11438" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="11290" r="2088" b="11491">graphic</wd>

<space/>

<wd l="2136" t="11290" r="2928" b="11448">metadata</wd>

<space/>

<wd l="2976" t="11290" r="3806" b="11448">described</wd>

<space/>

<wd l="3850" t="11290" r="4022" b="11443">in</wd>

<space/>

<wd l="4070" t="11290" r="5184" b="11486">(Herdadelen,</wd>

<space/>

<wd l="5246" t="11294" r="5794" b="11486">2013).</wd>

</ln>

</para>

<para l="1440" t="11558" r="5803" b="12259" alignment="justified" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="1656" t="11558" r="5803" b="11760" baseLine="11708">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1656" t="11563" r="1963" b="11717">For</wd>

<space/>

<wd l="2021" t="11611" r="2117" b="11717">a</wd>

<space/>

<wd l="2179" t="11558" r="2654" b="11760">given</wd>

<space/>

<wd l="2712" t="11558" r="3197" b="11717">token</wd>

<space/>

</run>

<wd l="3259" t="11573" r="3384" b="11750"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3466" t="11582" r="3619" b="11717">at</wd>

<space/>

<wd l="3677" t="11558" r="4382" b="11760">position</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4445" t="11563" r="4507" b="11717">i</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4574" t="11558" r="4747" b="11712">in</wd>

<space/>

<wd l="4810" t="11611" r="4906" b="11717">a</wd>

<space/>

<wd l="4963" t="11582" r="5477" b="11746">tweet,</wd>

<space/>

<wd l="5549" t="11611" r="5803" b="11717">we</wd>

<space/>

</run>

</ln>

<ln l="1445" t="11827" r="5803" b="12029" baseLine="11981" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="11827" r="1934" b="11986">chose</wd>

<space/>

<wd l="1992" t="11827" r="2256" b="11986">the</wd>

<space/>

<wd l="2309" t="11851" r="2736" b="11986">most</wd>

<space/>

<wd l="2789" t="11827" r="3547" b="12029">probable</wd>

<space/>

<wd l="3610" t="11827" r="4416" b="11986">corrected</wd>

<space/>

<wd l="4469" t="11827" r="4896" b="11986">form</wd>

<space/>

<wd l="4949" t="11827" r="5803" b="12029">according</wd>

<space/>

</ln>

<ln l="1440" t="12101" r="2678" b="12259" baseLine="12250" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="12125" r="1608" b="12259">to</wd>

<space/>

<wd l="1666" t="12101" r="1930" b="12259">the</wd>

<space/>

<wd l="1987" t="12101" r="2678" b="12259">formula</wd>

</ln>

</para>

<para l="2304" t="12542" r="5794" b="12782" alignment="left" li="864" spaceBefore="215" lsp="exactly" lspExact="259" language="en">

<tabs position="2304"/>

<ln l="2304" t="12542" r="5794" b="12782" baseLine="12717">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2304" t="12624" r="2606" b="12773">arg</wd>

<space/>

<wd l="2650" t="12624" r="3053" b="12730">max</wd>

<space/>

</run>

<wd l="3245" t="12542" r="5083" b="12782"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">P</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="superscript" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">|</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">−</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">P</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">+1</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">|</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="superscript" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">&apos;</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><tab position="5083"/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5558" t="12576" r="5794" b="12768">(1)</wd>

</run>

</ln>

</para>

<para l="2150" t="12802" r="3192" b="12965" alignment="left" li="720" lsp="exactly" lspExact="184" language="en">

<ln l="2150" t="12802" r="3192" b="12965" baseLine="12917">

<wd l="2150" t="12802" r="3192" b="12965"><run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">t</run>

<run underlined="none" subsuperscript="none" fontSize="450" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">�</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">E</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">C</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">,et</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6">))</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="6"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1440" t="13190" r="5818" b="15312" alignment="justified" spaceBefore="202" spaceAfter="44" lsp="exactly" lspExact="271" language="en">

<ln l="1440" t="13190" r="5818" b="13421" baseLine="13359">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="13210" r="1973" b="13368">where</wd>

<space/>

<wd l="2064" t="13210" r="2328" b="13368">the</wd>

<space/>

<wd l="2424" t="13210" r="3149" b="13368">function</wd>

<space/>

</run>

<wd l="3250" t="13195" r="3691" b="13421"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">C</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="3744" t="13195" r="4296" b="13421"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">ct</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">))</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4406" t="13234" r="5002" b="13368">returns</wd>

<space/>

<wd l="5107" t="13262" r="5203" b="13368">a</wd>

<space/>

<wd l="5299" t="13234" r="5534" b="13368">set</wd>

<space/>

<wd l="5630" t="13210" r="5818" b="13368">of</wd>

<space/>

</run>

</ln>

<ln l="1440" t="13464" r="5794" b="13690" baseLine="13633">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="13483" r="1666" b="13637">IV</wd>

<space/>

<wd l="1728" t="13478" r="2640" b="13637">candidates</wd>

<space/>

<wd l="2702" t="13478" r="2957" b="13637">for</wd>

<space/>

<wd l="3010" t="13478" r="3274" b="13637">the</wd>

<space/>

<wd l="3331" t="13478" r="3816" b="13637">token</wd>

<space/>

</run>

<wd l="3874" t="13493" r="4056" b="13670"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4128" t="13478" r="4978" b="13680">according</wd>

<space/>

<wd l="5035" t="13502" r="5203" b="13637">to</wd>

<space/>

</run>

<wd l="5266" t="13464" r="5794" b="13690"><run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">ct</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="1440" t="13752" r="5808" b="13954" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="13752" r="1973" b="13910">which</wd>

<space/>

<wd l="2059" t="13752" r="2198" b="13910">is</wd>

<space/>

<wd l="2290" t="13752" r="2554" b="13910">the</wd>

<space/>

<wd l="2645" t="13752" r="3528" b="13910">correction</wd>

<space/>

<wd l="3610" t="13776" r="3984" b="13954">type</wd>

<space/>

<wd l="4075" t="13752" r="5054" b="13910">determined</wd>

<space/>

<wd l="5141" t="13752" r="5395" b="13910">for</wd>

<space/>

<wd l="5477" t="13752" r="5808" b="13910">that</wd>

<space/>

</ln>

<ln l="1440" t="14021" r="5794" b="14222" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="14021" r="1925" b="14179">token</wd>

<space/>

<wd l="2002" t="14021" r="2218" b="14222">by</wd>

<space/>

<wd l="2299" t="14021" r="2568" b="14179">the</wd>

<space/>

<wd l="2659" t="14074" r="3446" b="14222">sequence</wd>

<space/>

<wd l="3528" t="14021" r="4070" b="14179">model</wd>

<space/>

<wd l="4152" t="14021" r="5088" b="14179">introduced</wd>

<space/>

<wd l="5170" t="14021" r="5338" b="14174">in</wd>

<space/>

<wd l="5424" t="14026" r="5794" b="14179">Sec-</wd>

</ln>

<ln l="1440" t="14294" r="5798" b="14496" baseLine="14443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="14294" r="1781" b="14453">tion</wd>

<space/>

<wd l="1858" t="14299" r="2174" b="14453">4.1.</wd>

<space/>

<wd l="2333" t="14299" r="2616" b="14453">We</wd>

<space/>

<wd l="2698" t="14294" r="3384" b="14453">indexed</wd>

<space/>

<wd l="3461" t="14294" r="3730" b="14453">the</wd>

<space/>

<wd l="3806" t="14294" r="4435" b="14453">Twitter</wd>

<space/>

<wd l="4512" t="14347" r="5146" b="14496">n-gram</wd>

<space/>

<wd l="5227" t="14347" r="5798" b="14496">corpus</wd>

<space/>

</ln>

<ln l="1440" t="14563" r="5808" b="14765" baseLine="14717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="14563" r="1829" b="14722">with</wd>

<space/>

<wd l="1886" t="14563" r="2155" b="14722">the</wd>

<space/>

<wd l="2213" t="14563" r="2770" b="14765">highly</wd>

<space/>

<wd l="2837" t="14563" r="3576" b="14722">effective</wd>

<space/>

<wd l="3638" t="14568" r="3974" b="14717">LIT</wd>

<space/>

<wd l="4037" t="14563" r="4690" b="14722">indexer</wd>

<space/>

<wd l="4752" t="14563" r="5429" b="14765">(Ceylan</wd>

<space/>

<wd l="5496" t="14563" r="5808" b="14722">and</wd>

<space/>

</ln>

<ln l="1440" t="14837" r="5803" b="15038" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="14837" r="2294" b="15024">Mihalcea,</wd>

<space/>

<wd l="2390" t="14842" r="2938" b="15034">2011),</wd>

<space/>

<wd l="3034" t="14837" r="3566" b="14995">which</wd>

<space/>

<wd l="3638" t="14837" r="4114" b="14995">made</wd>

<space/>

<wd l="4186" t="14837" r="4502" b="14995">fast</wd>

<space/>

<wd l="4579" t="14837" r="5198" b="15038">queries</wd>

<space/>

<wd l="5285" t="14837" r="5477" b="14995">of</wd>

<space/>

<wd l="5539" t="14837" r="5803" b="14995">the</wd>

<space/>

</ln>

<ln l="1440" t="15106" r="5808" b="15312" baseLine="15265">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1440" t="15106" r="1867" b="15264">form</wd>

<space/>

</run>

<wd l="1939" t="15134" r="2338" b="15293"><run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">t</run>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontPitch="fixed" spacing="-2">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">−</run>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontPitch="fixed" spacing="-2">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontPitch="fixed" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><wd l="2419" t="15154" r="2506" b="15254">*</wd>

<space/>

</run>

<wd l="2573" t="15134" r="2966" b="15312"><run underlined="none" subsuperscript="none" fontSize="1350" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">t</run>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontPitch="fixed" spacing="-2">i</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">+</run>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontPitch="fixed" spacing="-2">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontPitch="fixed" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3062" t="15106" r="3821" b="15307">possible,</wd>

<space/>

<wd l="3902" t="15106" r="4171" b="15264">the</wd>

<space/>

<wd l="4248" t="15106" r="4882" b="15307">symbol</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontPitch="fixed" spacing="-2"><wd l="4978" t="15163" r="5064" b="15245">*</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="5155" t="15106" r="5635" b="15307">being</wd>

<space/>

<wd l="5712" t="15158" r="5808" b="15264">a</wd>

</run>

</ln>

</para>

</column>

<column l="6142" t="1225" r="10524" b="15386">

<table l="6142" t="1225" r="10524" b="2628" alignment="left" spaceAfter="144">

<gridTable>

<gridCol>1557</gridCol>

<gridCol>1051</gridCol>

<gridCol>836</gridCol>

<gridCol>938</gridCol>

<gridRow>306</gridRow>

<gridRow>250</gridRow>

<gridRow>274</gridRow>

<gridRow>273</gridRow>

<gridRow>300</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6269" t="1291" r="7200" b="1450" alignment="left" li="127" spaceAfter="24" lsp="exactly" lspExact="271" language="en">

<ln l="6269" t="1291" r="7200" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6269" t="1291" r="7200" b="1450">Correction</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7819" t="1291" r="8630" b="1450" alignment="centered" spaceAfter="24" lsp="exactly" lspExact="271" language="en">

<ln l="7819" t="1291" r="8630" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7819" t="1291" r="8630" b="1450">Precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="8890" t="1291" r="9446" b="1450" alignment="centered" spaceAfter="24" lsp="exactly" lspExact="271" language="en">

<ln l="8890" t="1291" r="9446" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8890" t="1291" r="9446" b="1450">Recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="9710" t="1296" r="10363" b="1450" alignment="centered" spaceAfter="24" lsp="exactly" lspExact="271" language="en">

<ln l="9710" t="1296" r="10363" b="1450" baseLine="1445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9710" t="1296" r="10363" b="1450">F-score</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<para l="6269" t="1565" r="7574" b="1771" alignment="left" li="127" lsp="exactly" lspExact="240" language="en">

<ln l="6269" t="1565" r="7574" b="1771" baseLine="1723" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="9">

<wd l="6269" t="1565" r="7574" b="1771">MissingApos</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<para l="7930" t="1574" r="8520" b="1728" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="7930"/>

<ln l="7930" t="1574" r="8520" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7930" t="1574" r="8520" b="1728">0.9972</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="8875" t="1574" r="9466" b="1728" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="8875"/>

<ln l="8875" t="1574" r="9466" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8875" t="1574" r="9466" b="1728">0.9972</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="9739" t="1574" r="10334" b="1728" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="9739"/>

<ln l="9739" t="1574" r="10334" b="1728" baseLine="1723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9739" t="1574" r="10334" b="1728">0.9972</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6269" t="1838" r="7478" b="2045" alignment="left" li="127" lsp="exactly" lspExact="256" language="en">

<ln l="6269" t="1838" r="7478" b="2045" baseLine="1992" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="6269" t="1838" r="7478" b="2045">MissingW5</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="7930" t="1843" r="8520" b="2002" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="7930"/>

<ln l="7930" t="1843" r="8520" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7930" t="1843" r="8520" b="2002">0.8684</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="8875" t="1848" r="9461" b="2002" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="8875"/>

<ln l="8875" t="1848" r="9461" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8875" t="1848" r="9461" b="2002">0.4177</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="9739" t="1843" r="10315" b="2002" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="9739"/>

<ln l="9739" t="1843" r="10315" b="2002" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="9739" t="1843" r="10315" b="2002">0.5641</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6283" t="2117" r="6653" b="2270" alignment="left" li="127" lsp="exactly" lspExact="256" language="en">

<ln l="6283" t="2117" r="6653" b="2270" baseLine="2266">

<wd l="6283" t="2117" r="6653" b="2270"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="7930" t="2117" r="8501" b="2270" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="7930"/>

<ln l="7930" t="2117" r="8501" b="2270" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7930" t="2117" r="8501" b="2270">0.9191</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="8875" t="2117" r="9461" b="2270" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="8875"/>

<ln l="8875" t="2117" r="9461" b="2270" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8875" t="2117" r="9461" b="2270">0.9219</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="9739" t="2112" r="10325" b="2270" alignment="left" lsp="exactly" lspExact="263" language="en">

<tabs position="9739"/>

<ln l="9739" t="2112" r="10325" b="2270" baseLine="2266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9739" t="2112" r="10325" b="2270">0.9205</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6283" t="2386" r="7162" b="2573" alignment="left" li="127" spaceAfter="1" lsp="exactly" lspExact="275" language="en">

<ln l="6283" t="2386" r="7162" b="2573" baseLine="2551">

<wd l="6283" t="2386" r="7162" b="2573"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">toM</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">ABB</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="7930" t="2386" r="8501" b="2544" alignment="left" spaceAfter="29" lsp="exactly" lspExact="263" language="en">

<tabs position="7930"/>

<ln l="7930" t="2386" r="8501" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7930" t="2386" r="8501" b="2544">0.8861</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="8875" t="2386" r="9456" b="2544" alignment="left" spaceAfter="29" lsp="exactly" lspExact="263" language="en">

<tabs position="8875"/>

<ln l="8875" t="2386" r="9456" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8875" t="2386" r="9456" b="2544">0.9533</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="9739" t="2386" r="10325" b="2544" alignment="left" spaceAfter="29" lsp="exactly" lspExact="263" language="en">

<tabs position="9739"/>

<ln l="9739" t="2386" r="10325" b="2544" baseLine="2534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9739" t="2386" r="10325" b="2544">0.9185</wd>

</ln>

</para>

</cell>

</table>

<para l="6144" t="2832" r="10512" b="3307" alignment="justified" spaceAfter="361" lsp="exactly" lspExact="269" language="en">

<ln l="6144" t="2832" r="10512" b="3034" baseLine="2986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="2832" r="6624" b="2990">Table</wd>

<space/>

<wd l="6725" t="2837" r="6874" b="2990">4:</wd>

<space/>

<wd l="7046" t="2832" r="7786" b="2990">Detailed</wd>

<space/>

<wd l="7886" t="2832" r="8986" b="3034">performance</wd>

<space/>

<wd l="9091" t="2885" r="9302" b="2990">on</wd>

<space/>

<wd l="9398" t="2832" r="9667" b="2990">the</wd>

<space/>

<wd l="9768" t="2832" r="10512" b="2990">different</wd>

<space/>

</ln>

<ln l="6149" t="3106" r="9533" b="3307" baseLine="3254" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="3106" r="7032" b="3264">correction</wd>

<space/>

<wd l="7085" t="3130" r="7536" b="3307">types</wd>

<space/>

<wd l="7603" t="3158" r="7819" b="3264">on</wd>

<space/>

<wd l="7872" t="3106" r="8136" b="3264">the</wd>

<space/>

<wd l="8194" t="3106" r="8870" b="3307">training</wd>

<space/>

<wd l="8933" t="3106" r="9533" b="3264">dataset</wd>

</ln>

</para>

<table l="6142" t="3681" r="10524" b="5066" alignment="left" spaceAfter="144">

<gridTable>

<gridCol>1557</gridCol>

<gridCol>1051</gridCol>

<gridCol>836</gridCol>

<gridCol>938</gridCol>

<gridRow>288</gridRow>

<gridRow>245</gridRow>

<gridRow>274</gridRow>

<gridRow>273</gridRow>

<gridRow>305</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6269" t="3730" r="7200" b="3888" alignment="left" li="127" spaceAfter="25" lsp="exactly" lspExact="254" language="en">

<ln l="6269" t="3730" r="7200" b="3888" baseLine="3878" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6269" t="3730" r="7200" b="3888">Correction</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7819" t="3730" r="8630" b="3888" alignment="centered" spaceAfter="25" lsp="exactly" lspExact="254" language="en">

<ln l="7819" t="3730" r="8630" b="3888" baseLine="3878" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7819" t="3730" r="8630" b="3888">Precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="8890" t="3730" r="9446" b="3888" alignment="centered" spaceAfter="25" lsp="exactly" lspExact="254" language="en">

<ln l="8890" t="3730" r="9446" b="3888" baseLine="3878" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8890" t="3730" r="9446" b="3888">Recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="9710" t="3734" r="10363" b="3888" alignment="centered" spaceAfter="25" lsp="exactly" lspExact="254" language="en">

<ln l="9710" t="3734" r="10363" b="3888" baseLine="3878" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9710" t="3734" r="10363" b="3888">F-score</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<para l="6269" t="4003" r="7574" b="4210" alignment="left" li="127" lsp="exactly" lspExact="235" language="en">

<ln l="6269" t="4003" r="7574" b="4210" baseLine="4157" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="9">

<wd l="6269" t="4003" r="7574" b="4210">MissingApos</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<para l="7949" t="4013" r="8520" b="4166" alignment="left" lsp="exactly" lspExact="235" language="en">

<tabs position="7949"/>

<ln l="7949" t="4013" r="8520" b="4166" baseLine="4157" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7949" t="4013" r="8520" b="4166">1.0000</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="8875" t="4013" r="9446" b="4166" alignment="left" lsp="exactly" lspExact="235" language="en">

<tabs position="8875"/>

<ln l="8875" t="4013" r="9446" b="4166" baseLine="4157" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="8875" t="4013" r="9446" b="4166">0.9841</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="9797" t="4013" r="10277" b="4166" alignment="left" lsp="exactly" lspExact="235" language="en">

<tabs position="9797"/>

<ln l="9797" t="4013" r="10277" b="4166" baseLine="4157" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9797" t="4013" r="10277" b="4166">0.992</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6269" t="4272" r="7478" b="4478" alignment="left" li="127" lsp="exactly" lspExact="252" language="en">

<ln l="6269" t="4272" r="7478" b="4478" baseLine="4430" italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="6269" t="4272" r="7478" b="4478">MissingW5</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="7930" t="4282" r="8515" b="4435" alignment="left" lsp="exactly" lspExact="262" language="en">

<tabs position="7930"/>

<ln l="7930" t="4282" r="8515" b="4435" baseLine="4430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="7930" t="4282" r="8515" b="4435">0.9737</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="8875" t="4277" r="9461" b="4435" alignment="left" lsp="exactly" lspExact="262" language="en">

<tabs position="8875"/>

<ln l="8875" t="4277" r="9461" b="4435" baseLine="4430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8875" t="4277" r="9461" b="4435">0.4458</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="9739" t="4277" r="10334" b="4435" alignment="left" lsp="exactly" lspExact="262" language="en">

<tabs position="9739"/>

<ln l="9739" t="4277" r="10334" b="4435" baseLine="4430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9739" t="4277" r="10334" b="4435">0.6116</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6283" t="4555" r="6653" b="4709" alignment="left" li="127" lsp="exactly" lspExact="257" language="en">

<ln l="6283" t="4555" r="6653" b="4709" baseLine="4699">

<wd l="6283" t="4555" r="6653" b="4709"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">to</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="7930" t="4555" r="8501" b="4709" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="7930"/>

<ln l="7930" t="4555" r="8501" b="4709" baseLine="4699" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7930" t="4555" r="8501" b="4709">0.9141</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="8875" t="4555" r="9461" b="4709" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="8875"/>

<ln l="8875" t="4555" r="9461" b="4709" baseLine="4699" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8875" t="4555" r="9461" b="4709">0.9127</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="9739" t="4555" r="10334" b="4709" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="9739"/>

<ln l="9739" t="4555" r="10334" b="4709" baseLine="4699" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9739" t="4555" r="10334" b="4709">0.9134</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="6283" t="4819" r="7162" b="5006" alignment="left" li="127" lsp="exactly" lspExact="271" language="en">

<ln l="6283" t="4819" r="7162" b="5006" baseLine="4989">

<wd l="6283" t="4819" r="7162" b="5006"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Arial" fontFamily="roman" fontPitch="variable" spacing="8">toM</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8">ABB</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="850" fontFace="Arial" fontFamily="swiss" fontPitch="variable" spacing="8"><nl orig="true"/>

</run>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="7930" t="4819" r="8510" b="4978" alignment="left" spaceAfter="24" lsp="exactly" lspExact="269" language="en">

<tabs position="7930"/>

<ln l="7930" t="4819" r="8510" b="4978" baseLine="4973" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="7930" t="4819" r="8510" b="4978">0.8523</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="8875" t="4819" r="9461" b="4978" alignment="left" spaceAfter="24" lsp="exactly" lspExact="269" language="en">

<tabs position="8875"/>

<ln l="8875" t="4819" r="9461" b="4978" baseLine="4973" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8875" t="4819" r="9461" b="4978">0.9699</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="9739" t="4824" r="10325" b="4978" alignment="left" spaceAfter="24" lsp="exactly" lspExact="269" language="en">

<tabs position="9739"/>

<ln l="9739" t="4824" r="10325" b="4978" baseLine="4973" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9739" t="4824" r="10325" b="4978">0.9073</wd>

</ln>

</para>

</cell>

</table>

<para l="6144" t="5270" r="10512" b="5741" alignment="justified" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="5270" r="10512" b="5472" baseLine="5419" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5270" r="6624" b="5429">Table</wd>

<space/>

<wd l="6730" t="5270" r="6874" b="5429">5:</wd>

<space/>

<wd l="7046" t="5270" r="7786" b="5429">Detailed</wd>

<space/>

<wd l="7886" t="5270" r="8986" b="5472">performance</wd>

<space/>

<wd l="9091" t="5323" r="9302" b="5429">on</wd>

<space/>

<wd l="9398" t="5270" r="9667" b="5429">the</wd>

<space/>

<wd l="9768" t="5270" r="10512" b="5429">different</wd>

<space/>

</ln>

<ln l="6149" t="5539" r="9158" b="5741" baseLine="5693" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="5539" r="7032" b="5698">correction</wd>

<space/>

<wd l="7085" t="5563" r="7536" b="5741">types</wd>

<space/>

<wd l="7603" t="5592" r="7819" b="5698">on</wd>

<space/>

<wd l="7872" t="5539" r="8136" b="5698">the</wd>

<space/>

<wd l="8194" t="5563" r="8501" b="5698">test</wd>

<space/>

<wd l="8558" t="5539" r="9158" b="5698">dataset</wd>

</ln>

</para>

<para l="6144" t="6355" r="10512" b="7867" alignment="justified" spaceBefore="539" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="6355" r="10498" b="6557" baseLine="6504" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="6355" r="7166" b="6557">placeholder</wd>

<space/>

<wd l="7253" t="6355" r="7512" b="6514">for</wd>

<space/>

<wd l="7608" t="6408" r="7910" b="6557">any</wd>

<space/>

<wd l="8006" t="6355" r="8486" b="6514">token</wd>

<space/>

<wd l="8582" t="6379" r="8736" b="6514">at</wd>

<space/>

<wd l="8827" t="6355" r="9091" b="6514">the</wd>

<space/>

<wd l="9192" t="6355" r="9662" b="6557">given</wd>

<space/>

<wd l="9754" t="6355" r="10498" b="6557">position.</wd>

<space/>

</ln>

<ln l="6144" t="6624" r="10502" b="6826" baseLine="6778" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="6624" r="6485" b="6782">The</wd>

<space/>

<wd l="6538" t="6624" r="6917" b="6826">only</wd>

<space/>

<wd l="6970" t="6677" r="7339" b="6782">case</wd>

<space/>

<wd l="7387" t="6624" r="7862" b="6782">when</wd>

<space/>

<wd l="7906" t="6677" r="8160" b="6782">we</wd>

<space/>

<wd l="8213" t="6624" r="8486" b="6782">did</wd>

<space/>

<wd l="8530" t="6648" r="8813" b="6782">not</wd>

<space/>

<wd l="8861" t="6624" r="9461" b="6782">choose</wd>

<space/>

<wd l="9509" t="6624" r="9773" b="6782">the</wd>

<space/>

<wd l="9821" t="6624" r="10502" b="6782">normal-</wd>

</ln>

<ln l="6144" t="6898" r="10512" b="7099" baseLine="7046" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="6898" r="6739" b="7056">ization</wd>

<space/>

<wd l="6787" t="6898" r="6979" b="7056">of</wd>

<space/>

<wd l="7013" t="6950" r="7210" b="7056">an</wd>

<space/>

<wd l="7258" t="6902" r="7714" b="7056">OOV</wd>

<space/>

<wd l="7757" t="6898" r="8208" b="7056">word</wd>

<space/>

<wd l="8251" t="6898" r="9106" b="7099">according</wd>

<space/>

<wd l="9149" t="6922" r="9317" b="7056">to</wd>

<space/>

<wd l="9370" t="6902" r="9605" b="7094">(1)</wd>

<space/>

<wd l="9658" t="6950" r="9989" b="7056">was</wd>

<space/>

<wd l="10037" t="6898" r="10512" b="7056">when</wd>

<space/>

</ln>

<ln l="6144" t="7166" r="10512" b="7368" baseLine="7315" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="7166" r="6581" b="7325">there</wd>

<space/>

<wd l="6658" t="7219" r="6984" b="7325">was</wd>

<space/>

<wd l="7070" t="7219" r="7166" b="7325">a</wd>

<space/>

<wd l="7238" t="7166" r="7829" b="7368">unique</wd>

<space/>

<wd l="7915" t="7166" r="8837" b="7368">suggestion</wd>

<space/>

<wd l="8909" t="7166" r="9168" b="7325">for</wd>

<space/>

<wd l="9245" t="7219" r="9442" b="7325">an</wd>

<space/>

<wd l="9518" t="7171" r="9744" b="7325">IV</wd>

<space/>

<wd l="9821" t="7166" r="10267" b="7325">word</wd>

<space/>

<wd l="10339" t="7166" r="10512" b="7320">in</wd>

<space/>

</ln>

<ln l="6144" t="7440" r="10502" b="7598" baseLine="7589" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="7440" r="6413" b="7598">the</wd>

<space/>

<wd l="6509" t="7440" r="7723" b="7598">normalization</wd>

<space/>

<wd l="7824" t="7440" r="8827" b="7598">dictionaries</wd>

<space/>

<wd l="8933" t="7493" r="9187" b="7598">we</wd>

<space/>

<wd l="9288" t="7440" r="9758" b="7598">listed</wd>

<space/>

<wd l="9854" t="7440" r="10027" b="7594">in</wd>

<space/>

<wd l="10128" t="7445" r="10502" b="7598">Sec-</wd>

</ln>

<ln l="6144" t="7709" r="6854" b="7867" baseLine="7858" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="7709" r="6485" b="7867">tion</wd>

<space/>

<wd l="6538" t="7714" r="6854" b="7867">4.1.</wd>

</ln>

</para>

<para l="6144" t="8054" r="10512" b="9840" alignment="justified" spaceBefore="75" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="6365" t="8054" r="10512" b="8256" baseLine="8203" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="8054" r="6701" b="8213">The</wd>

<space/>

<wd l="6797" t="8054" r="7896" b="8256">performance</wd>

<space/>

<wd l="7997" t="8054" r="8189" b="8213">of</wd>

<space/>

<wd l="8266" t="8054" r="8534" b="8213">the</wd>

<space/>

<wd l="8626" t="8054" r="9840" b="8213">normalization</wd>

<space/>

<wd l="9936" t="8107" r="10152" b="8213">on</wd>

<space/>

<wd l="10243" t="8054" r="10512" b="8213">the</wd>

<space/>

</ln>

<ln l="6144" t="8323" r="10512" b="8525" baseLine="8477" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="8323" r="6821" b="8525">training</wd>

<space/>

<wd l="6902" t="8323" r="7214" b="8482">and</wd>

<space/>

<wd l="7282" t="8347" r="7589" b="8482">test</wd>

<space/>

<wd l="7666" t="8347" r="8026" b="8510">sets,</wd>

<space/>

<wd l="8122" t="8323" r="8971" b="8525">according</wd>

<space/>

<wd l="9048" t="8347" r="9211" b="8482">to</wd>

<space/>

<wd l="9288" t="8323" r="9552" b="8482">the</wd>

<space/>

<wd l="9634" t="8323" r="10512" b="8482">correction</wd>

<space/>

</ln>

<ln l="6144" t="8597" r="10502" b="8798" baseLine="8746" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="8621" r="6600" b="8798">types</wd>

<space/>

<wd l="6667" t="8650" r="6922" b="8755">we</wd>

<space/>

<wd l="6994" t="8597" r="7632" b="8755">defined</wd>

<space/>

<wd l="7699" t="8650" r="7992" b="8755">can</wd>

<space/>

<wd l="8059" t="8597" r="8266" b="8755">be</wd>

<space/>

<wd l="8328" t="8597" r="8837" b="8755">found</wd>

<space/>

<wd l="8899" t="8597" r="9072" b="8750">in</wd>

<space/>

<wd l="9134" t="8597" r="9610" b="8755">Table</wd>

<space/>

<wd l="9677" t="8602" r="9782" b="8750">4</wd>

<space/>

<wd l="9854" t="8597" r="10162" b="8755">and</wd>

<space/>

<wd l="10224" t="8602" r="10502" b="8755">Ta-</wd>

</ln>

<ln l="6144" t="8866" r="10512" b="9067" baseLine="9019" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="8866" r="6413" b="9024">ble</wd>

<space/>

<wd l="6475" t="8866" r="6619" b="9053">5,</wd>

<space/>

<wd l="6691" t="8866" r="7762" b="9067">respectively.</wd>

<space/>

<wd l="7848" t="8870" r="8323" b="9024">From</wd>

<space/>

<wd l="8376" t="8866" r="8822" b="9024">these</wd>

<space/>

<wd l="8880" t="8866" r="9432" b="9053">tables,</wd>

<space/>

<wd l="9509" t="8918" r="9816" b="9024">one</wd>

<space/>

<wd l="9878" t="8918" r="10171" b="9024">can</wd>

<space/>

<wd l="10243" t="8918" r="10512" b="9024">see</wd>

<space/>

</ln>

<ln l="6144" t="9139" r="10502" b="9298" baseLine="9288" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="9139" r="6475" b="9298">that</wd>

<space/>

<wd l="6523" t="9139" r="6787" b="9298">the</wd>

<space/>

<wd l="6840" t="9163" r="7325" b="9298">worst</wd>

<space/>

<wd l="7373" t="9139" r="7934" b="9298">results</wd>

<space/>

<wd l="7992" t="9192" r="8419" b="9298">were</wd>

<space/>

<wd l="8472" t="9139" r="9221" b="9298">obtained</wd>

<space/>

<wd l="9269" t="9139" r="9528" b="9298">for</wd>

<space/>

<wd l="9576" t="9139" r="9840" b="9298">the</wd>

<space/>

<wd l="9898" t="9192" r="10502" b="9298">correc-</wd>

</ln>

<ln l="6144" t="9408" r="10512" b="9610" baseLine="9562" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="9408" r="6485" b="9566">tion</wd>

<space/>

<wd l="6533" t="9432" r="6907" b="9610">type</wd>

<space/>

<wd l="6960" t="9408" r="7430" b="9566">when</wd>

<space/>

<wd l="7488" t="9461" r="8040" b="9610">spaces</wd>

<space/>

<wd l="8098" t="9461" r="8520" b="9566">were</wd>

<space/>

<wd l="8573" t="9408" r="9298" b="9610">required</wd>

<space/>

<wd l="9346" t="9432" r="9514" b="9566">to</wd>

<space/>

<wd l="9566" t="9408" r="9773" b="9566">be</wd>

<space/>

<wd l="9821" t="9408" r="10512" b="9566">inserted</wd>

<space/>

</ln>

<ln l="6144" t="9682" r="7526" b="9840" baseLine="9830" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="9706" r="6312" b="9840">to</wd>

<space/>

<wd l="6374" t="9734" r="6470" b="9840">a</wd>

<space/>

<wd l="6528" t="9686" r="6979" b="9840">OOV</wd>

<space/>

<wd l="7037" t="9682" r="7526" b="9840">word.</wd>

</ln>

</para>

<para l="6144" t="10022" r="10512" b="12936" alignment="justified" spaceBefore="72" spaceAfter="524" fli="216" lsp="exactly" lspExact="271" language="en">

<ln l="6365" t="10022" r="10502" b="10181" baseLine="10176" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6365" t="10022" r="6744" b="10181">This</wd>

<space/>

<wd l="6821" t="10022" r="6955" b="10181">is</wd>

<space/>

<wd l="7032" t="10022" r="7205" b="10176">in</wd>

<space/>

<wd l="7277" t="10022" r="8251" b="10181">accordance</wd>

<space/>

<wd l="8318" t="10022" r="8707" b="10181">with</wd>

<space/>

<wd l="8774" t="10022" r="9043" b="10181">the</wd>

<space/>

<wd l="9110" t="10022" r="9437" b="10181">fact</wd>

<space/>

<wd l="9504" t="10022" r="9830" b="10181">that</wd>

<space/>

<wd l="9902" t="10075" r="10195" b="10181">our</wd>

<space/>

<wd l="10267" t="10075" r="10502" b="10181">se-</wd>

</ln>

<ln l="6149" t="10296" r="10507" b="10498" baseLine="10445" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="10349" r="6763" b="10498">quence</wd>

<space/>

<wd l="6840" t="10296" r="7382" b="10454">model</wd>

<space/>

<wd l="7464" t="10296" r="8213" b="10454">obtained</wd>

<space/>

<wd l="8285" t="10296" r="8549" b="10454">the</wd>

<space/>

<wd l="8626" t="10296" r="9192" b="10454">lowest</wd>

<space/>

<wd l="9274" t="10349" r="9802" b="10454">scores</wd>

<space/>

<wd l="9888" t="10296" r="10507" b="10498">exactly</wd>

<space/>

</ln>

<ln l="6149" t="10565" r="10512" b="10752" baseLine="10718" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="10618" r="6365" b="10723">on</wd>

<space/>

<wd l="6437" t="10565" r="6744" b="10723">this</wd>

<space/>

<wd l="6830" t="10565" r="7219" b="10723">kind</wd>

<space/>

<wd l="7296" t="10565" r="7488" b="10723">of</wd>

<space/>

<wd l="7550" t="10565" r="8558" b="10723">corrections.</wd>

<space/>

<wd l="8698" t="10570" r="9518" b="10752">However,</wd>

<space/>

<wd l="9614" t="10565" r="9926" b="10723">due</wd>

<space/>

<wd l="9998" t="10589" r="10166" b="10723">to</wd>

<space/>

<wd l="10243" t="10565" r="10512" b="10723">the</wd>

<space/>

</ln>

<ln l="6144" t="10838" r="10498" b="11040" baseLine="10987" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="10838" r="6470" b="10997">fact</wd>

<space/>

<wd l="6547" t="10838" r="6878" b="10997">that</wd>

<space/>

<wd l="6955" t="10838" r="7262" b="10997">this</wd>

<space/>

<wd l="7354" t="10891" r="7776" b="10997">error</wd>

<space/>

<wd l="7858" t="10862" r="8597" b="11040">category</wd>

<space/>

<wd l="8678" t="10838" r="8818" b="10997">is</wd>

<space/>

<wd l="8904" t="10838" r="9173" b="10997">the</wd>

<space/>

<wd l="9250" t="10838" r="9653" b="10997">least</wd>

<space/>

<wd l="9730" t="10838" r="10498" b="11040">frequent,</wd>

<space/>

</ln>

<ln l="6144" t="11107" r="10512" b="11309" baseLine="11261" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11107" r="6413" b="11266">the</wd>

<space/>

<wd l="6494" t="11107" r="6989" b="11266">lower</wd>

<space/>

<wd l="7075" t="11160" r="7603" b="11266">scores</wd>

<space/>

<wd l="7694" t="11160" r="7910" b="11266">on</wd>

<space/>

<wd l="7987" t="11107" r="8318" b="11266">that</wd>

<space/>

<wd l="8400" t="11131" r="9139" b="11309">category</wd>

<space/>

<wd l="9230" t="11107" r="9614" b="11266">does</wd>

<space/>

<wd l="9706" t="11131" r="9984" b="11266">not</wd>

<space/>

<wd l="10061" t="11107" r="10512" b="11266">harm</wd>

<space/>

</ln>

<ln l="6144" t="11381" r="10512" b="11582" baseLine="11530" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11381" r="6475" b="11539">that</wd>

<space/>

<wd l="6533" t="11381" r="7018" b="11539">much</wd>

<space/>

<wd l="7085" t="11434" r="7373" b="11539">our</wd>

<space/>

<wd l="7435" t="11381" r="8026" b="11539">overall</wd>

<space/>

<wd l="8088" t="11381" r="9192" b="11582">performance</wd>

<space/>

<wd l="9254" t="11434" r="9427" b="11539">as</wd>

<space/>

<wd l="9499" t="11434" r="9792" b="11539">can</wd>

<space/>

<wd l="9859" t="11381" r="10061" b="11539">be</wd>

<space/>

<wd l="10133" t="11434" r="10512" b="11539">seen</wd>

<space/>

</ln>

<ln l="6144" t="11650" r="10498" b="11851" baseLine="11803" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11650" r="6317" b="11803">in</wd>

<space/>

<wd l="6389" t="11650" r="6869" b="11808">Table</wd>

<space/>

<wd l="6950" t="11650" r="7051" b="11808">6</wd>

<space/>

<wd l="7133" t="11650" r="7387" b="11808">for</wd>

<space/>

<wd l="7459" t="11650" r="7848" b="11808">both</wd>

<space/>

<wd l="7925" t="11650" r="8189" b="11808">the</wd>

<space/>

<wd l="8266" t="11650" r="8942" b="11851">training</wd>

<space/>

<wd l="9024" t="11650" r="9336" b="11808">and</wd>

<space/>

<wd l="9413" t="11674" r="9715" b="11808">test</wd>

<space/>

<wd l="9797" t="11702" r="10498" b="11851">corpora.</wd>

<space/>

</ln>

<ln l="6144" t="11923" r="10512" b="12082" baseLine="12072" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="11923" r="6485" b="12082">The</wd>

<space/>

<wd l="6571" t="11923" r="7133" b="12082">results</wd>

<space/>

<wd l="7238" t="11923" r="7795" b="12082">shown</wd>

<space/>

<wd l="7877" t="11923" r="8050" b="12077">in</wd>

<space/>

<wd l="8136" t="11923" r="8611" b="12082">Table</wd>

<space/>

<wd l="8707" t="11923" r="8808" b="12082">6</wd>

<space/>

<wd l="8899" t="11923" r="9245" b="12082">also</wd>

<space/>

<wd l="9336" t="11923" r="10094" b="12082">illustrate</wd>

<space/>

<wd l="10186" t="11923" r="10512" b="12082">that</wd>

<space/>

</ln>

<ln l="6149" t="12192" r="10502" b="12394" baseLine="12341" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="12245" r="6442" b="12350">our</wd>

<space/>

<wd l="6499" t="12192" r="7296" b="12394">approach</wd>

<space/>

<wd l="7368" t="12245" r="7882" b="12350">seems</wd>

<space/>

<wd l="7949" t="12216" r="8117" b="12350">to</wd>

<space/>

<wd l="8184" t="12192" r="9077" b="12394">generalize</wd>

<space/>

<wd l="9134" t="12192" r="9557" b="12379">well,</wd>

<space/>

<wd l="9634" t="12245" r="9802" b="12350">as</wd>

<space/>

<wd l="9869" t="12192" r="10306" b="12350">there</wd>

<space/>

<wd l="10363" t="12192" r="10502" b="12350">is</wd>

<space/>

</ln>

<ln l="6149" t="12466" r="10512" b="12667" baseLine="12614" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="12518" r="6245" b="12624">a</wd>

<space/>

<wd l="6293" t="12466" r="6754" b="12624">small</wd>

<space/>

<wd l="6806" t="12518" r="7109" b="12667">gap</wd>

<space/>

<wd l="7157" t="12466" r="7886" b="12624">between</wd>

<space/>

<wd l="7925" t="12466" r="8194" b="12624">the</wd>

<space/>

<wd l="8237" t="12466" r="9413" b="12667">performances</wd>

<space/>

<wd l="9470" t="12466" r="10253" b="12624">observed</wd>

<space/>

<wd l="10296" t="12518" r="10512" b="12624">on</wd>

<space/>

</ln>

<ln l="6144" t="12734" r="9883" b="12936" baseLine="12883" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6144" t="12734" r="6413" b="12893">the</wd>

<space/>

<wd l="6466" t="12734" r="7142" b="12936">training</wd>

<space/>

<wd l="7205" t="12734" r="7517" b="12893">and</wd>

<space/>

<wd l="7570" t="12758" r="7877" b="12893">test</wd>

<space/>

<wd l="7939" t="12758" r="8246" b="12893">sets</wd>

<space/>

<wd l="8314" t="12734" r="8506" b="12893">of</wd>

<space/>

<wd l="8544" t="12734" r="8813" b="12893">the</wd>

<space/>

<wd l="8875" t="12734" r="9437" b="12893">shared</wd>

<space/>

<wd l="9490" t="12734" r="9883" b="12893">task.</wd>

</ln>

</para>

<table l="6898" t="13468" r="9768" b="14584" alignment="left" li="756" ri="756" spaceAfter="144">

<gridTable>

<gridCol>1042</gridCol>

<gridCol>979</gridCol>

<gridCol>849</gridCol>

<gridRow>288</gridRow>

<gridRow>250</gridRow>

<gridRow>274</gridRow>

<gridRow>304</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6898" t="13468" r="7940" b="13756" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="8054" t="13517" r="8798" b="13718" alignment="centered" spaceAfter="25" lsp="exactly" lspExact="259" language="en">

<ln l="8054" t="13517" r="8798" b="13718" baseLine="13670" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8054" t="13517" r="8798" b="13718">Training</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="9158" t="13522" r="9523" b="13675" alignment="centered" spaceAfter="25" lsp="exactly" lspExact="259" language="en">

<ln l="9158" t="13522" r="9523" b="13675" baseLine="13670" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9158" t="13522" r="9523" b="13675">Test</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<para l="7018" t="13795" r="7819" b="13997" alignment="left" li="120" lsp="exactly" lspExact="240" language="en">

<ln l="7018" t="13795" r="7819" b="13997" baseLine="13949" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7018" t="13795" r="7819" b="13997">precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<para l="8131" t="13800" r="8717" b="13954" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="8131"/>

<ln l="8131" t="13800" r="8717" b="13954" baseLine="13949" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8131" t="13800" r="8717" b="13954">0.8703</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="9043" t="13795" r="9634" b="13954" alignment="left" lsp="exactly" lspExact="240" language="en">

<tabs position="9043"/>

<ln l="9043" t="13795" r="9634" b="13954" baseLine="13949" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9043" t="13795" r="9634" b="13954">0.8606</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="7018" t="14069" r="7493" b="14227" alignment="left" li="120" lsp="exactly" lspExact="264" language="en">

<ln l="7018" t="14069" r="7493" b="14227" baseLine="14218" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7018" t="14069" r="7493" b="14227">recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="8131" t="14069" r="8717" b="14227" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="8131"/>

<ln l="8131" t="14069" r="8717" b="14227" baseLine="14218" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8131" t="14069" r="8717" b="14227">0.7673</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="9043" t="14069" r="9634" b="14227" alignment="left" lsp="exactly" lspExact="264" language="en">

<tabs position="9043"/>

<ln l="9043" t="14069" r="9634" b="14227" baseLine="14218" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9043" t="14069" r="9634" b="14227">0.7564</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<rightBorder type="single" width="10"/>

<para l="7018" t="14342" r="7224" b="14491" alignment="left" li="120" spaceAfter="24" lsp="exactly" lspExact="268" language="en">

<ln l="7018" t="14342" r="7224" b="14491" baseLine="14491" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-9">

<wd l="7018" t="14342" r="7224" b="14491">F1</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<para l="8131" t="14338" r="8726" b="14496" alignment="left" spaceAfter="19" lsp="exactly" lspExact="254" language="en">

<tabs position="8131"/>

<ln l="8131" t="14338" r="8726" b="14496" baseLine="14491" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8131" t="14338" r="8726" b="14496">0.8156</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="9043" t="14338" r="9634" b="14496" alignment="left" spaceAfter="19" lsp="exactly" lspExact="254" language="en">

<tabs position="9043"/>

<ln l="9043" t="14338" r="9634" b="14496" baseLine="14491" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9043" t="14338" r="9634" b="14496">0.8052</wd>

</ln>

</para>

</cell>

</table>

<para l="6144" t="14789" r="10512" b="15259" alignment="justified" spaceAfter="111" lsp="exactly" lspExact="271" language="en">

<ln l="6144" t="14789" r="10512" b="14990" baseLine="14938" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="14789" r="6624" b="14947">Table</wd>

<space/>

<wd l="6677" t="14789" r="6826" b="14947">6:</wd>

<space/>

<wd l="6912" t="14789" r="7555" b="14947">Overall</wd>

<space/>

<wd l="7608" t="14789" r="8707" b="14990">performance</wd>

<space/>

<wd l="8760" t="14789" r="8952" b="14947">of</wd>

<space/>

<wd l="8990" t="14842" r="9278" b="14947">our</wd>

<space/>

<wd l="9336" t="14813" r="9931" b="14990">system</wd>

<space/>

<wd l="9984" t="14842" r="10200" b="14947">on</wd>

<space/>

<wd l="10243" t="14789" r="10512" b="14947">the</wd>

<space/>

</ln>

<ln l="6144" t="15058" r="7925" b="15259" baseLine="15211" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="15058" r="6821" b="15259">training</wd>

<space/>

<wd l="6883" t="15058" r="7195" b="15216">and</wd>

<space/>

<wd l="7248" t="15082" r="7555" b="15216">test</wd>

<space/>

<wd l="7618" t="15082" r="7925" b="15216">sets</wd>

</ln>

</para>

</column>

</section>

<dd l="1440" t="15746" r="10524" b="15975">

<para l="5771" t="15792" r="6191" b="15946" alignment="centered" lsp="exactly" lspExact="223" language="en">

<ln l="5837" t="15792" r="6125" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="27">

<wd l="5837" t="15792" r="6125" b="15946">123</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4318.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1260" marginRight="1373" marginBottom="1292" offsetX="-24" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1260" r="10536" b="15318">

<column l="1440" t="1260" r="5842" b="15318">

<para l="1445" t="1306" r="2938" b="1478" alignment="left" spaceBefore="3" lsp="exactly" lspExact="273" language="en">

<ln l="1445" t="1306" r="2938" b="1478" baseLine="1474" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="12">

<wd l="1445" t="1310" r="1555" b="1478">5</wd>

<space/>

<wd l="1810" t="1306" r="2938" b="1478">Conclusion</wd>

</ln>

</para>

<para l="1440" t="1752" r="5813" b="4666" alignment="justified" spaceBefore="156" lsp="exactly" lspExact="271" language="en">

<ln l="1440" t="1752" r="5803" b="1954" baseLine="1901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="1757" r="1622" b="1906">In</wd>

<space/>

<wd l="1699" t="1752" r="2006" b="1910">this</wd>

<space/>

<wd l="2093" t="1805" r="2611" b="1954">paper,</wd>

<space/>

<wd l="2707" t="1805" r="2957" b="1910">we</wd>

<space/>

<wd l="3038" t="1752" r="3970" b="1910">introduced</wd>

<space/>

<wd l="4051" t="1805" r="4344" b="1910">our</wd>

<space/>

<wd l="4421" t="1752" r="5218" b="1954">approach</wd>

<space/>

<wd l="5290" t="1776" r="5458" b="1910">to</wd>

<space/>

<wd l="5539" t="1752" r="5803" b="1910">the</wd>

<space/>

</ln>

<ln l="1440" t="2021" r="5808" b="2222" baseLine="2174" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="2021" r="2016" b="2179">lexical</wd>

<space/>

<wd l="2059" t="2021" r="3274" b="2179">normalization</wd>

<space/>

<wd l="3317" t="2021" r="3509" b="2179">of</wd>

<space/>

<wd l="3538" t="2021" r="4205" b="2222">English</wd>

<space/>

<wd l="4248" t="2045" r="4795" b="2179">tweets</wd>

<space/>

<wd l="4843" t="2021" r="5174" b="2179">that</wd>

<space/>

<wd l="5213" t="2021" r="5808" b="2179">ranked</wd>

<space/>

</ln>

<ln l="1450" t="2294" r="5808" b="2496" baseLine="2443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="2294" r="2050" b="2453">second</wd>

<space/>

<wd l="2098" t="2318" r="2251" b="2453">at</wd>

<space/>

<wd l="2299" t="2294" r="2563" b="2453">the</wd>

<space/>

<wd l="2621" t="2294" r="3182" b="2453">shared</wd>

<space/>

<wd l="3230" t="2294" r="3581" b="2453">task</wd>

<space/>

<wd l="3634" t="2347" r="4219" b="2496">among</wd>

<space/>

<wd l="4267" t="2294" r="4531" b="2453">the</wd>

<space/>

<wd l="4579" t="2294" r="5808" b="2453">unconstrained</wd>

<space/>

</ln>

<ln l="1450" t="2563" r="5794" b="2765" baseLine="2717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="2563" r="2549" b="2722">submissions.</wd>

<space/>

<wd l="2755" t="2568" r="3096" b="2722">Our</wd>

<space/>

<wd l="3187" t="2563" r="4138" b="2722">framework</wd>

<space/>

<wd l="4238" t="2563" r="4574" b="2722">first</wd>

<space/>

<wd l="4666" t="2563" r="5448" b="2765">performs</wd>

<space/>

<wd l="5558" t="2616" r="5794" b="2722">se-</wd>

</ln>

<ln l="1445" t="2837" r="5794" b="3038" baseLine="2986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="2890" r="2059" b="3038">quence</wd>

<space/>

<wd l="2122" t="2837" r="2818" b="3038">labeling</wd>

<space/>

<wd l="2890" t="2890" r="3269" b="2995">over</wd>

<space/>

<wd l="3326" t="2837" r="3595" b="2995">the</wd>

<space/>

<wd l="3658" t="2837" r="4214" b="2995">tokens</wd>

<space/>

<wd l="4291" t="2837" r="4483" b="2995">of</wd>

<space/>

<wd l="4536" t="2890" r="4632" b="2995">a</wd>

<space/>

<wd l="4690" t="2861" r="5165" b="2995">tweet</wd>

<space/>

<wd l="5222" t="2861" r="5390" b="2995">to</wd>

<space/>

<wd l="5453" t="2890" r="5794" b="3038">pre-</wd>

</ln>

<ln l="1445" t="3106" r="5813" b="3264" baseLine="3259">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1445" t="3106" r="1771" b="3264">dict</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1814" t="3106" r="2333" b="3264">which</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="2386" t="3106" r="2942" b="3264">tokens</wd>

<space/>

<wd l="3000" t="3106" r="3413" b="3264">need</wd>

<space/>

<wd l="3461" t="3130" r="3629" b="3264">to</wd>

<space/>

<wd l="3682" t="3106" r="3888" b="3264">be</wd>

<space/>

<wd l="3941" t="3106" r="4747" b="3264">corrected</wd>

<space/>

<wd l="4800" t="3106" r="5112" b="3264">and</wd>

<space/>

<wd l="5160" t="3106" r="5333" b="3259">in</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="5381" t="3106" r="5813" b="3264">what</wd>

<space/>

</run>

</ln>

<ln l="1440" t="3379" r="5794" b="3581" baseLine="3528">

<wd l="1440" t="3432" r="1834" b="3581"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">way</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="2035" t="3379" r="2414" b="3538">This</wd>

<space/>

<wd l="2525" t="3403" r="2866" b="3581">step</wd>

<space/>

<wd l="2962" t="3379" r="3101" b="3538">is</wd>

<space/>

<wd l="3202" t="3379" r="3974" b="3538">followed</wd>

<space/>

<wd l="4066" t="3379" r="4282" b="3581">by</wd>

<space/>

<wd l="4382" t="3379" r="5266" b="3538">correction</wd>

<space/>

<wd l="5357" t="3403" r="5794" b="3581">type-</wd>

</run>

</ln>

<ln l="1450" t="3648" r="5808" b="3850" baseLine="3802" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="3648" r="2194" b="3806">sensitive</wd>

<space/>

<wd l="2261" t="3648" r="3091" b="3806">candidate</wd>

<space/>

<wd l="3158" t="3672" r="3394" b="3806">set</wd>

<space/>

<wd l="3456" t="3648" r="4416" b="3850">generation,</wd>

<space/>

<wd l="4488" t="3648" r="4915" b="3806">from</wd>

<space/>

<wd l="4973" t="3648" r="5506" b="3806">which</wd>

<space/>

<wd l="5573" t="3672" r="5808" b="3806">set</wd>

<space/>

</ln>

<ln l="1440" t="3922" r="5808" b="4123" baseLine="4070" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="3922" r="1704" b="4080">the</wd>

<space/>

<wd l="1762" t="3946" r="2189" b="4080">most</wd>

<space/>

<wd l="2242" t="3922" r="2731" b="4123">likely</wd>

<space/>

<wd l="2794" t="3926" r="3019" b="4080">IV</wd>

<space/>

<wd l="3077" t="3922" r="4291" b="4080">normalization</wd>

<space/>

<wd l="4349" t="3922" r="4541" b="4080">of</wd>

<space/>

<wd l="4584" t="3974" r="4786" b="4080">an</wd>

<space/>

<wd l="4848" t="3926" r="5299" b="4080">OOV</wd>

<space/>

<wd l="5362" t="3922" r="5808" b="4080">word</wd>

<space/>

</ln>

<ln l="1440" t="4190" r="5803" b="4392" baseLine="4339" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4190" r="1579" b="4349">is</wd>

<space/>

<wd l="1642" t="4190" r="2338" b="4349">selected</wd>

<space/>

<wd l="2381" t="4190" r="2597" b="4392">by</wd>

<space/>

<wd l="2650" t="4190" r="3418" b="4392">querying</wd>

<space/>

<wd l="3470" t="4243" r="3667" b="4349">an</wd>

<space/>

<wd l="3725" t="4190" r="4594" b="4392">efficiently</wd>

<space/>

<wd l="4642" t="4190" r="5328" b="4349">indexed</wd>

<space/>

<wd l="5371" t="4190" r="5803" b="4392">large</wd>

<space/>

</ln>

<ln l="1440" t="4464" r="4344" b="4666" baseLine="4613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1440" t="4517" r="2074" b="4666">n-gram</wd>

<space/>

<wd l="2131" t="4464" r="2731" b="4622">dataset</wd>

<space/>

<wd l="2789" t="4464" r="2981" b="4622">of</wd>

<space/>

<wd l="3019" t="4464" r="3691" b="4666">English</wd>

<space/>

<wd l="3744" t="4488" r="4344" b="4622">tweets.</wd>

</ln>

</para>

<para l="1445" t="5194" r="2544" b="5366" alignment="left" spaceBefore="473" lsp="exactly" lspExact="273" language="en">

<ln l="1445" t="5194" r="2544" b="5366" baseLine="5357" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="5194" r="2544" b="5366">References</wd>

</ln>

</para>

<para l="1440" t="5582" r="5813" b="7085" alignment="justified" li="216" spaceBefore="139" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="5582" r="5798" b="5770" baseLine="5722" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="5582" r="2117" b="5770">Timothy</wd>

<space/>

<wd l="2261" t="5582" r="2981" b="5750">Baldwin,</wd>

<space/>

<wd l="3154" t="5582" r="3624" b="5726">Marie</wd>

<space/>

<wd l="3778" t="5582" r="4546" b="5726">Catherine</wd>

<space/>

<wd l="4694" t="5582" r="4877" b="5726">de</wd>

<space/>

<wd l="5021" t="5582" r="5798" b="5750">Marneffe,</wd>

<space/>

</ln>

<ln l="1656" t="5803" r="5803" b="5990" baseLine="5942" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="5808" r="1886" b="5947">Bo</wd>

<space/>

<wd l="1982" t="5808" r="2357" b="5971">Han,</wd>

<space/>

<wd l="2462" t="5808" r="3442" b="5990">Young-Bum</wd>

<space/>

<wd l="3533" t="5803" r="3926" b="5971">Kim,</wd>

<space/>

<wd l="4037" t="5803" r="4421" b="5947">Alan</wd>

<space/>

<wd l="4512" t="5803" r="5002" b="5971">Ritter,</wd>

<space/>

<wd l="5117" t="5803" r="5400" b="5947">and</wd>

<space/>

<wd l="5491" t="5803" r="5803" b="5947">Wei</wd>

<space/>

</ln>

<ln l="1656" t="6024" r="5808" b="6206" baseLine="6158" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="6029" r="1939" b="6168">Xu.</wd>

<space/>

<wd l="2083" t="6024" r="2515" b="6168">2015.</wd>

<space/>

<wd l="2664" t="6024" r="3211" b="6168">Shared</wd>

<space/>

<wd l="3278" t="6024" r="3672" b="6168">tasks</wd>

<space/>

<wd l="3749" t="6024" r="3922" b="6168">of</wd>

<space/>

<wd l="3979" t="6024" r="4219" b="6168">the</wd>

<space/>

<wd l="4291" t="6024" r="4680" b="6168">2015</wd>

<space/>

<wd l="4757" t="6024" r="5534" b="6206">workshop</wd>

<space/>

<wd l="5611" t="6067" r="5808" b="6168">on</wd>

<space/>

</ln>

<ln l="1656" t="6240" r="5798" b="6427" baseLine="6379" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="6240" r="2088" b="6427">noisy</wd>

<space/>

<wd l="2174" t="6240" r="3346" b="6427">user-generated</wd>

<space/>

<wd l="3427" t="6259" r="3763" b="6384">text:</wd>

<space/>

<wd l="3907" t="6240" r="4478" b="6384">Twitter</wd>

<space/>

<wd l="4565" t="6240" r="5088" b="6384">lexical</wd>

<space/>

<wd l="5174" t="6240" r="5798" b="6384">normal-</wd>

</ln>

<ln l="1656" t="6461" r="5798" b="6648" baseLine="6595">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="6461" r="2203" b="6605">ization</wd>

<space/>

<wd l="2275" t="6461" r="2558" b="6605">and</wd>

<space/>

<wd l="2630" t="6461" r="3163" b="6605">named</wd>

<space/>

<wd l="3235" t="6461" r="3682" b="6648">entity</wd>

<space/>

<wd l="3754" t="6461" r="4699" b="6648">recognition.</wd>

<space/>

<wd l="4848" t="6466" r="5016" b="6600">In</wd>

<space/>

</run>

<wd l="5083" t="6461" r="5798" b="6605" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Proceed-</wd>

</ln>

<ln l="1666" t="6682" r="5813" b="6864" baseLine="6816" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1666" t="6686" r="1992" b="6864">ings</wd>

<space/>

<wd l="2054" t="6682" r="2237" b="6864">of</wd>

<space/>

<wd l="2266" t="6682" r="2506" b="6826">the</wd>

<space/>

<wd l="2573" t="6682" r="3350" b="6864">Workshop</wd>

<space/>

<wd l="3418" t="6730" r="3610" b="6826">on</wd>

<space/>

<wd l="3667" t="6686" r="4123" b="6864">Noisy</wd>

<space/>

<wd l="4205" t="6682" r="5434" b="6864">User-generated</wd>

<space/>

<wd l="5496" t="6686" r="5813" b="6826">Text</wd>

<space/>

</ln>

<ln l="1661" t="6898" r="4094" b="7085" baseLine="7037">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1661" t="6902" r="2299" b="7075">(WNUT</wd>

<space/>

</run>

<wd l="2328" t="6898" r="2837" b="7075"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2894" t="6898" r="3523" b="7085">Beijing,</wd>

<space/>

<wd l="3586" t="6898" r="4094" b="7042">China.</wd>

</run>

</ln>

</para>

<para l="1440" t="7315" r="5837" b="8818" alignment="justified" li="216" spaceBefore="197" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="7315" r="5808" b="7502" baseLine="7454" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="7315" r="1958" b="7459">Hakan</wd>

<space/>

<wd l="2021" t="7315" r="2573" b="7502">Ceylan</wd>

<space/>

<wd l="2635" t="7315" r="2923" b="7459">and</wd>

<space/>

<wd l="2976" t="7315" r="3389" b="7459">Rada</wd>

<space/>

<wd l="3442" t="7315" r="4219" b="7459">Mihalcea.</wd>

<space/>

<wd l="4325" t="7315" r="4757" b="7459">2011.</wd>

<space/>

<wd l="4858" t="7320" r="5102" b="7454">An</wd>

<space/>

<wd l="5160" t="7315" r="5808" b="7459">efficient</wd>

<space/>

</ln>

<ln l="1656" t="7536" r="5837" b="7723" baseLine="7670">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1656" t="7536" r="2251" b="7680">indexer</wd>

<space/>

<wd l="2299" t="7536" r="2534" b="7680">for</wd>

<space/>

<wd l="2587" t="7536" r="2976" b="7723">large</wd>

<space/>

<wd l="3034" t="7579" r="3610" b="7723">n-gram</wd>

<space/>

<wd l="3662" t="7579" r="4306" b="7718">corpora.</wd>

<space/>

<wd l="4392" t="7541" r="4560" b="7675">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4613" t="7536" r="5597" b="7718">Proceedings</wd>

<space/>

<wd l="5654" t="7536" r="5837" b="7718">of</wd>

<space/>

</run>

</ln>

<ln l="1661" t="7757" r="5798" b="7939" baseLine="7891" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1661" t="7757" r="1901" b="7901">the</wd>

<space/>

<wd l="1939" t="7757" r="2290" b="7901">49th</wd>

<space/>

<wd l="2323" t="7757" r="2914" b="7901">Annual</wd>

<space/>

<wd l="2947" t="7762" r="3600" b="7939">Meeting</wd>

<space/>

<wd l="3648" t="7757" r="3830" b="7939">of</wd>

<space/>

<wd l="3845" t="7757" r="4080" b="7901">the</wd>

<space/>

<wd l="4109" t="7762" r="5050" b="7901">Association</wd>

<space/>

<wd l="5059" t="7757" r="5333" b="7939">for</wd>

<space/>

<wd l="5371" t="7762" r="5798" b="7901">Com-</wd>

</ln>

<ln l="1637" t="7973" r="5798" b="8155" baseLine="8112" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1637" t="7973" r="2482" b="8155">putational</wd>

<space/>

<wd l="2525" t="7978" r="3461" b="8155">Linguistics:</wd>

<space/>

<wd l="3533" t="7978" r="4118" b="8117">Human</wd>

<space/>

<wd l="4171" t="7978" r="4963" b="8155">Language</wd>

<space/>

<wd l="5030" t="7973" r="5798" b="8117">Technolo-</wd>

</ln>

<ln l="1656" t="8194" r="5808" b="8381" baseLine="8328">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1656" t="8198" r="2035" b="8376">gies:</wd>

<space/>

<wd l="2102" t="8198" r="2736" b="8376">Systems</wd>

<space/>

</run>

<wd l="2770" t="8198" r="4090" b="8362"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Demonstrations</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4142" t="8198" r="4507" b="8333">HLT</wd>

<space/>

<wd l="4565" t="8194" r="4862" b="8362">’11,</wd>

<space/>

<wd l="4910" t="8237" r="5357" b="8381">pages</wd>

<space/>

<wd l="5424" t="8194" r="5808" b="8338">103–</wd>

<space/>

</run>

</ln>

<ln l="1675" t="8414" r="5798" b="8602" baseLine="8549" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1675" t="8414" r="2002" b="8582">108,</wd>

<space/>

<wd l="2083" t="8414" r="3091" b="8602">Stroudsburg,</wd>

<space/>

<wd l="3168" t="8419" r="3451" b="8582">PA,</wd>

<space/>

<wd l="3523" t="8414" r="3960" b="8558">USA.</wd>

<space/>

<wd l="4042" t="8414" r="4982" b="8558">Association</wd>

<space/>

<wd l="5050" t="8414" r="5285" b="8558">for</wd>

<space/>

<wd l="5357" t="8414" r="5798" b="8558">Com-</wd>

</ln>

<ln l="1656" t="8630" r="3427" b="8818" baseLine="8770" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1656" t="8630" r="2453" b="8813">putational</wd>

<space/>

<wd l="2506" t="8630" r="3427" b="8818">Linguistics.</wd>

</ln>

</para>

<para l="1440" t="9048" r="5813" b="10330" alignment="justified" li="216" spaceBefore="198" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="9048" r="5808" b="9235" baseLine="9182" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="9053" r="1853" b="9192">Leon</wd>

<space/>

<wd l="1910" t="9048" r="2861" b="9235">Derczynski,</wd>

<space/>

<wd l="2938" t="9048" r="3322" b="9192">Alan</wd>

<space/>

<wd l="3384" t="9048" r="3874" b="9216">Ritter,</wd>

<space/>

<wd l="3950" t="9048" r="4306" b="9192">Sam</wd>

<space/>

<wd l="4368" t="9048" r="4853" b="9216">Clark,</wd>

<space/>

<wd l="4930" t="9048" r="5213" b="9192">and</wd>

<space/>

<wd l="5280" t="9048" r="5808" b="9192">Kalina</wd>

<space/>

</ln>

<ln l="1656" t="9269" r="5803" b="9456" baseLine="9403" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="9269" r="2539" b="9413">Bontcheva.</wd>

<space/>

<wd l="2722" t="9269" r="3154" b="9413">2013.</wd>

<space/>

<wd l="3326" t="9269" r="3898" b="9413">Twitter</wd>

<space/>

<wd l="3979" t="9269" r="5131" b="9451">part-of-speech</wd>

<space/>

<wd l="5208" t="9269" r="5803" b="9456">tagging</wd>

<space/>

</ln>

<ln l="1656" t="9485" r="5798" b="9672" baseLine="9624">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="9485" r="1891" b="9629">for</wd>

<space/>

<wd l="1954" t="9485" r="2189" b="9629">all:</wd>

<space/>

<wd l="2290" t="9485" r="3278" b="9672">Overcoming</wd>

<space/>

<wd l="3350" t="9528" r="3835" b="9667">sparse</wd>

<space/>

<wd l="3902" t="9485" r="4186" b="9629">and</wd>

<space/>

<wd l="4248" t="9485" r="4675" b="9672">noisy</wd>

<space/>

<wd l="4742" t="9485" r="5107" b="9629">data.</wd>

<space/>

<wd l="5222" t="9490" r="5390" b="9624">In</wd>

<space/>

</run>

<wd l="5448" t="9490" r="5798" b="9629" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Pro-</wd>

</ln>

<ln l="1661" t="9706" r="5813" b="9888" baseLine="9840" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1661" t="9706" r="2357" b="9888">ceedings</wd>

<space/>

<wd l="2419" t="9706" r="2597" b="9888">of</wd>

<space/>

<wd l="2630" t="9706" r="2866" b="9850">the</wd>

<space/>

<wd l="2923" t="9706" r="3979" b="9850">International</wd>

<space/>

<wd l="4046" t="9706" r="4944" b="9888">Conference</wd>

<space/>

<wd l="5011" t="9754" r="5203" b="9850">on</wd>

<space/>

<wd l="5261" t="9710" r="5813" b="9850">Recent</wd>

<space/>

</ln>

<ln l="1642" t="9926" r="5798" b="10109" baseLine="10061">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="9926" r="2419" b="10070">Advances</wd>

<space/>

<wd l="2486" t="9931" r="2630" b="10070">in</wd>

<space/>

<wd l="2683" t="9926" r="3307" b="10070">Natural</wd>

<space/>

<wd l="3360" t="9931" r="4157" b="10109">Language</wd>

<space/>

</run>

<wd l="4214" t="9931" r="5126" b="10109"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Processing</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5198" t="9926" r="5798" b="10070">Associ-</wd>

</run>

</ln>

<ln l="1661" t="10142" r="4546" b="10330" baseLine="10282" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1661" t="10142" r="2059" b="10286">ation</wd>

<space/>

<wd l="2107" t="10142" r="2338" b="10286">for</wd>

<space/>

<wd l="2395" t="10142" r="3571" b="10325">Computational</wd>

<space/>

<wd l="3624" t="10142" r="4546" b="10330">Linguistics.</wd>

</ln>

</para>

<para l="1445" t="10560" r="5803" b="12720" alignment="justified" li="216" spaceBefore="200" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1445" t="10560" r="5798" b="10742" baseLine="10699" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="10560" r="1920" b="10704">Kevin</wd>

<space/>

<wd l="1982" t="10560" r="2616" b="10742">Gimpel,</wd>

<space/>

<wd l="2688" t="10560" r="3259" b="10704">Nathan</wd>

<space/>

<wd l="3326" t="10560" r="4152" b="10728">Schneider,</wd>

<space/>

<wd l="4219" t="10560" r="4891" b="10704">Brendan</wd>

<space/>

<wd l="4958" t="10560" r="5798" b="10728">O’Connor,</wd>

<space/>

</ln>

<ln l="1656" t="10781" r="5798" b="10968" baseLine="10915" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1656" t="10781" r="2386" b="10968">Dipanjan</wd>

<space/>

<wd l="2525" t="10786" r="2880" b="10949">Das,</wd>

<space/>

<wd l="3048" t="10781" r="3576" b="10925">Daniel</wd>

<space/>

<wd l="3715" t="10781" r="4181" b="10949">Mills,</wd>

<space/>

<wd l="4344" t="10781" r="4795" b="10925">Jacob</wd>

<space/>

<wd l="4934" t="10781" r="5798" b="10949">Eisenstein,</wd>

<space/>

</ln>

<ln l="1656" t="11002" r="5798" b="11189" baseLine="11136" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1656" t="11002" r="2309" b="11146">Michael</wd>

<space/>

<wd l="2357" t="11002" r="3086" b="11170">Heilman,</wd>

<space/>

<wd l="3139" t="11002" r="3523" b="11146">Dani</wd>

<space/>

<wd l="3576" t="11006" r="4416" b="11189">Yogatama,</wd>

<space/>

<wd l="4469" t="11002" r="5011" b="11189">Jeffrey</wd>

<space/>

<wd l="5059" t="11002" r="5798" b="11189">Flanigan,</wd>

<space/>

</ln>

<ln l="1661" t="11218" r="5803" b="11405" baseLine="11357" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1661" t="11218" r="1949" b="11362">and</wd>

<space/>

<wd l="2011" t="11218" r="2443" b="11362">Noah</wd>

<space/>

<wd l="2510" t="11222" r="2688" b="11362">A.</wd>

<space/>

<wd l="2770" t="11218" r="3283" b="11362">Smith.</wd>

<space/>

<wd l="3418" t="11218" r="3850" b="11362">2011.</wd>

<space/>

<wd l="3984" t="11218" r="5146" b="11400">Part-of-speech</wd>

<space/>

<wd l="5208" t="11218" r="5803" b="11405">tagging</wd>

<space/>

</ln>

<ln l="1656" t="11438" r="5794" b="11621" baseLine="11573" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1656" t="11438" r="1891" b="11582">for</wd>

<space/>

<wd l="1963" t="11438" r="2520" b="11582">twitter:</wd>

<space/>

<wd l="2645" t="11438" r="3586" b="11606">Annotation,</wd>

<space/>

<wd l="3667" t="11438" r="4339" b="11606">features,</wd>

<space/>

<wd l="4430" t="11438" r="4714" b="11582">and</wd>

<space/>

<wd l="4790" t="11438" r="5794" b="11621">experiments.</wd>

<space/>

</ln>

<ln l="1656" t="11659" r="5803" b="11842" baseLine="11794">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1656" t="11664" r="1824" b="11798">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1901" t="11659" r="2890" b="11842">Proceedings</wd>

<space/>

<wd l="2971" t="11659" r="3149" b="11842">of</wd>

<space/>

<wd l="3202" t="11659" r="3437" b="11803">the</wd>

<space/>

<wd l="3518" t="11659" r="3869" b="11803">49th</wd>

<space/>

<wd l="3931" t="11659" r="4526" b="11803">Annual</wd>

<space/>

<wd l="4594" t="11664" r="5251" b="11842">Meeting</wd>

<space/>

<wd l="5333" t="11659" r="5515" b="11842">of</wd>

<space/>

<wd l="5568" t="11659" r="5803" b="11803">the</wd>

<space/>

</run>

</ln>

<ln l="1642" t="11875" r="5803" b="12058" baseLine="12014" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1642" t="11880" r="2582" b="12019">Association</wd>

<space/>

<wd l="2616" t="11875" r="2890" b="12058">for</wd>

<space/>

<wd l="2952" t="11875" r="4138" b="12058">Computational</wd>

<space/>

<wd l="4190" t="11880" r="5126" b="12058">Linguistics:</wd>

<space/>

<wd l="5213" t="11880" r="5803" b="12019">Human</wd>

<space/>

</ln>

<ln l="1651" t="12096" r="5798" b="12278" baseLine="12230">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1651" t="12101" r="2448" b="12278">Language</wd>

<space/>

<wd l="2534" t="12096" r="3619" b="12278">Technologies:</wd>

<space/>

<wd l="3734" t="12096" r="4176" b="12240">Short</wd>

<space/>

<wd l="4238" t="12101" r="4786" b="12278">Papers</wd>

<space/>

<wd l="4867" t="12182" r="4915" b="12197">-</wd>

<space/>

<wd l="5011" t="12096" r="5582" b="12240">Volume</wd>

<space/>

</run>

<wd l="5654" t="12096" r="5798" b="12264"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">2</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="1656" t="12312" r="5798" b="12504" baseLine="12451" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1656" t="12322" r="2026" b="12456">HLT</wd>

<space/>

<wd l="2102" t="12317" r="2395" b="12485">’11,</wd>

<space/>

<wd l="2462" t="12360" r="2914" b="12504">pages</wd>

<space/>

<wd l="2976" t="12317" r="3518" b="12485">42–47,</wd>

<space/>

<wd l="3590" t="12317" r="4598" b="12504">Stroudsburg,</wd>

<space/>

<wd l="4666" t="12322" r="4944" b="12485">PA,</wd>

<space/>

<wd l="5011" t="12317" r="5448" b="12461">USA.</wd>

<space/>

<wd l="5520" t="12322" r="5798" b="12461">As-</wd>

</ln>

<ln l="1666" t="12533" r="4867" b="12720" baseLine="12672" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1666" t="12533" r="2381" b="12677">sociation</wd>

<space/>

<wd l="2429" t="12533" r="2659" b="12677">for</wd>

<space/>

<wd l="2717" t="12533" r="3893" b="12715">Computational</wd>

<space/>

<wd l="3946" t="12533" r="4867" b="12720">Linguistics.</wd>

</ln>

</para>

<para l="1440" t="12950" r="5837" b="14453" alignment="justified" li="216" spaceBefore="197" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="12950" r="5798" b="13138" baseLine="13085" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="12955" r="1670" b="13094">Bo</wd>

<space/>

<wd l="1728" t="12955" r="2054" b="13094">Han</wd>

<space/>

<wd l="2117" t="12950" r="2400" b="13094">and</wd>

<space/>

<wd l="2453" t="12950" r="3130" b="13138">Timothy</wd>

<space/>

<wd l="3187" t="12950" r="3898" b="13094">Baldwin.</wd>

<space/>

<wd l="3998" t="12950" r="4430" b="13094">2011.</wd>

<space/>

<wd l="4526" t="12950" r="5117" b="13094">Lexical</wd>

<space/>

<wd l="5174" t="12950" r="5798" b="13094">normal-</wd>

</ln>

<ln l="1656" t="13171" r="5798" b="13358" baseLine="13306" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="13171" r="2189" b="13315">isation</wd>

<space/>

<wd l="2261" t="13171" r="2434" b="13315">of</wd>

<space/>

<wd l="2501" t="13171" r="2894" b="13315">short</wd>

<space/>

<wd l="2957" t="13190" r="3254" b="13315">text</wd>

<space/>

<wd l="3322" t="13214" r="4114" b="13358">messages:</wd>

<space/>

<wd l="4229" t="13171" r="4694" b="13315">Makn</wd>

<space/>

<wd l="4771" t="13214" r="5098" b="13315">sens</wd>

<space/>

<wd l="5179" t="13214" r="5266" b="13315">a</wd>

<space/>

<wd l="5328" t="13171" r="5798" b="13315">#twit-</wd>

</ln>

<ln l="1656" t="13387" r="5837" b="13570" baseLine="13526">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="13406" r="1896" b="13531">ter.</wd>

<space/>

<wd l="2040" t="13392" r="2203" b="13526">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2270" t="13387" r="3259" b="13570">Proceedings</wd>

<space/>

<wd l="3331" t="13387" r="3514" b="13570">of</wd>

<space/>

<wd l="3557" t="13387" r="3792" b="13531">the</wd>

<space/>

<wd l="3864" t="13387" r="4214" b="13531">49th</wd>

<space/>

<wd l="4272" t="13387" r="4862" b="13531">Annual</wd>

<space/>

<wd l="4925" t="13392" r="5578" b="13570">Meeting</wd>

<space/>

<wd l="5654" t="13387" r="5837" b="13570">of</wd>

<space/>

</run>

</ln>

<ln l="1661" t="13608" r="5798" b="13790" baseLine="13742" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1661" t="13608" r="1901" b="13752">the</wd>

<space/>

<wd l="1944" t="13613" r="2885" b="13752">Association</wd>

<space/>

<wd l="2909" t="13608" r="3182" b="13790">for</wd>

<space/>

<wd l="3240" t="13608" r="4430" b="13790">Computational</wd>

<space/>

<wd l="4478" t="13613" r="5414" b="13790">Linguistics:</wd>

<space/>

<wd l="5491" t="13613" r="5798" b="13752">Hu-</wd>

</ln>

<ln l="1656" t="13829" r="5798" b="14011" baseLine="13963">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="13877" r="1997" b="13973">man</wd>

<space/>

<wd l="2064" t="13834" r="2861" b="14011">Language</wd>

<space/>

<wd l="2942" t="13829" r="3970" b="14011">Technologies</wd>

<space/>

<wd l="4051" t="13915" r="4099" b="13930">-</wd>

<space/>

<wd l="4190" t="13829" r="4757" b="13973">Volume</wd>

<space/>

</run>

<wd l="4838" t="13829" r="4973" b="13997"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5054" t="13834" r="5419" b="13968">HLT</wd>

<space/>

<wd l="5506" t="13829" r="5798" b="13997">’11,</wd>

<space/>

</run>

</ln>

<ln l="1656" t="14045" r="5808" b="14232" baseLine="14184" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="14088" r="2107" b="14232">pages</wd>

<space/>

<wd l="2170" t="14045" r="2904" b="14213">368–378,</wd>

<space/>

<wd l="2966" t="14045" r="3970" b="14232">Stroudsburg,</wd>

<space/>

<wd l="4027" t="14050" r="4310" b="14213">PA,</wd>

<space/>

<wd l="4363" t="14045" r="4800" b="14189">USA.</wd>

<space/>

<wd l="4862" t="14045" r="5808" b="14189">Association</wd>

<space/>

</ln>

<ln l="1656" t="14266" r="4099" b="14453" baseLine="14400" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="14266" r="1891" b="14410">for</wd>

<space/>

<wd l="1944" t="14266" r="3120" b="14448">Computational</wd>

<space/>

<wd l="3173" t="14266" r="4099" b="14453">Linguistics.</wd>

</ln>

</para>

<para l="1440" t="14683" r="5813" b="15307" alignment="justified" li="216" spaceBefore="203" fli="-216" lsp="exactly" lspExact="217" language="en">

<ln l="1440" t="14683" r="5798" b="14870" baseLine="14818" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="14688" r="1670" b="14827">Bo</wd>

<space/>

<wd l="1723" t="14688" r="2098" b="14851">Han,</wd>

<space/>

<wd l="2160" t="14683" r="2506" b="14827">Paul</wd>

<space/>

<wd l="2568" t="14683" r="3038" b="14851">Cook,</wd>

<space/>

<wd l="3101" t="14683" r="3384" b="14827">and</wd>

<space/>

<wd l="3437" t="14683" r="4109" b="14870">Timothy</wd>

<space/>

<wd l="4166" t="14683" r="4882" b="14827">Baldwin.</wd>

<space/>

<wd l="4973" t="14683" r="5405" b="14827">2012.</wd>

<space/>

<wd l="5496" t="14688" r="5798" b="14827">Au-</wd>

</ln>

<ln l="1656" t="14899" r="5803" b="15086" baseLine="15038" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="14899" r="2554" b="15086">tomatically</wd>

<space/>

<wd l="2626" t="14899" r="3605" b="15086">constructing</wd>

<space/>

<wd l="3682" t="14942" r="3768" b="15043">a</wd>

<space/>

<wd l="3835" t="14899" r="4930" b="15043">normalisation</wd>

<space/>

<wd l="5002" t="14899" r="5803" b="15086">dictionary</wd>

<space/>

</ln>

<ln l="1656" t="15120" r="5813" b="15307" baseLine="15259">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="15120" r="1891" b="15264">for</wd>

<space/>

<wd l="1968" t="15120" r="2904" b="15307">microblogs.</wd>

<space/>

<wd l="3077" t="15125" r="3240" b="15259">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3317" t="15120" r="4306" b="15302">Proceedings</wd>

<space/>

<wd l="4387" t="15120" r="4570" b="15302">of</wd>

<space/>

<wd l="4618" t="15120" r="4858" b="15264">the</wd>

<space/>

<wd l="4934" t="15120" r="5328" b="15264">2012</wd>

<space/>

<wd l="5405" t="15125" r="5813" b="15264">Joint</wd>

</run>

</ln>

</para>

</column>

<column l="6134" t="1260" r="10536" b="15318">

<para l="6365" t="1334" r="10512" b="2179" alignment="justified" li="216" spaceBefore="46" lsp="exactly" lspExact="219" language="en">

<ln l="6374" t="1334" r="10502" b="1517" baseLine="1474" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6374" t="1334" r="7272" b="1517">Conference</wd>

<space/>

<wd l="7349" t="1382" r="7536" b="1478">on</wd>

<space/>

<wd l="7603" t="1334" r="8410" b="1517">Empirical</wd>

<space/>

<wd l="8467" t="1334" r="9158" b="1478">Methods</wd>

<space/>

<wd l="9235" t="1339" r="9379" b="1478">in</wd>

<space/>

<wd l="9446" t="1334" r="10070" b="1478">Natural</wd>

<space/>

<wd l="10128" t="1339" r="10502" b="1478">Lan-</wd>

</ln>

<ln l="6365" t="1555" r="10502" b="1738" baseLine="1694" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="1603" r="6845" b="1738">guage</wd>

<space/>

<wd l="6907" t="1560" r="7781" b="1738">Processing</wd>

<space/>

<wd l="7843" t="1555" r="8146" b="1699">and</wd>

<space/>

<wd l="8213" t="1555" r="9398" b="1738">Computational</wd>

<space/>

<wd l="9451" t="1555" r="10075" b="1699">Natural</wd>

<space/>

<wd l="10128" t="1560" r="10502" b="1699">Lan-</wd>

</ln>

<ln l="6365" t="1776" r="10512" b="1963" baseLine="1910">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="1824" r="6845" b="1958">guage</wd>

<space/>

<wd l="6878" t="1781" r="7613" b="1958">Learning</wd>

<space/>

<wd l="7661" t="1781" r="9029" b="1954">(EMNLP-CoNLL</wd>

<space/>

</run>

<wd l="9067" t="1776" r="9571" b="1954"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2012)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9619" t="1819" r="10070" b="1963">pages</wd>

<space/>

<wd l="10114" t="1776" r="10512" b="1915">421–</wd>

<space/>

</run>

</ln>

<ln l="6365" t="1992" r="8237" b="2179" baseLine="2131" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="1992" r="6706" b="2160">432,</wd>

<space/>

<wd l="6763" t="1992" r="7080" b="2179">Jeju</wd>

<space/>

<wd l="7133" t="1992" r="7666" b="2160">Island,</wd>

<space/>

<wd l="7723" t="1997" r="8237" b="2136">Korea.</wd>

</ln>

</para>

<para l="6144" t="2410" r="10502" b="3034" alignment="justified" li="216" spaceBefore="196" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="2410" r="10502" b="2597" baseLine="2544" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="2414" r="6374" b="2554">Bo</wd>

<space/>

<wd l="6422" t="2414" r="6797" b="2578">Han,</wd>

<space/>

<wd l="6845" t="2410" r="7195" b="2554">Paul</wd>

<space/>

<wd l="7248" t="2410" r="7714" b="2578">Cook,</wd>

<space/>

<wd l="7771" t="2410" r="8054" b="2554">and</wd>

<space/>

<wd l="8098" t="2410" r="8774" b="2597">Timothy</wd>

<space/>

<wd l="8818" t="2410" r="9533" b="2554">Baldwin.</wd>

<space/>

<wd l="9619" t="2410" r="10051" b="2554">2013.</wd>

<space/>

<wd l="10138" t="2414" r="10502" b="2554">Lex-</wd>

</ln>

<ln l="6365" t="2626" r="10488" b="2770" baseLine="2765">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6365" t="2626" r="6648" b="2770">ical</wd>

<space/>

<wd l="6691" t="2626" r="7800" b="2770">normalization</wd>

<space/>

<wd l="7838" t="2626" r="8069" b="2770">for</wd>

<space/>

<wd l="8117" t="2626" r="8568" b="2770">social</wd>

<space/>

<wd l="8611" t="2626" r="9101" b="2770">media</wd>

<space/>

<wd l="9139" t="2645" r="9470" b="2770">text.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9542" t="2630" r="9984" b="2770">ACM</wd>

<space/>

<wd l="10018" t="2630" r="10488" b="2770">Trans.</wd>

<space/>

</run>

</ln>

<ln l="6360" t="2846" r="10013" b="3034" baseLine="2981">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6360" t="2846" r="6811" b="2990">Intell.</wd>

<space/>

<wd l="6883" t="2851" r="7234" b="3029">Syst.</wd>

<space/>

</run>

<wd l="7315" t="2846" r="8016" b="3014"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Technol.</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="8074" t="2846" r="9216" b="3024">4(1):5:1–5:27,</wd>

<space/>

<wd l="9269" t="2846" r="10013" b="3034">February.</wd>

</run>

</ln>

</para>

<para l="6144" t="3259" r="10517" b="3878" alignment="justified" li="216" spaceBefore="197" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="3259" r="10512" b="3446" baseLine="3398" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="3264" r="6538" b="3403">Ama</wd>

<space/>

<wd l="6600" t="3259" r="7555" b="3403">Herdadelen.</wd>

<space/>

<wd l="7704" t="3259" r="8136" b="3403">2013.</wd>

<space/>

<wd l="8275" t="3259" r="8846" b="3403">Twitter</wd>

<space/>

<wd l="8914" t="3302" r="9494" b="3446">n-gram</wd>

<space/>

<wd l="9562" t="3302" r="10085" b="3442">corpus</wd>

<space/>

<wd l="10157" t="3259" r="10512" b="3403">with</wd>

<space/>

</ln>

<ln l="6370" t="3480" r="10517" b="3667" baseLine="3614">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6370" t="3480" r="7402" b="3667">demographic</wd>

<space/>

<wd l="7488" t="3480" r="8246" b="3624">metadata.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8434" t="3485" r="9230" b="3662">Language</wd>

<space/>

<wd l="9312" t="3485" r="10128" b="3624">Resources</wd>

<space/>

<wd l="10214" t="3480" r="10517" b="3624">and</wd>

<space/>

</run>

</ln>

<ln l="6360" t="3701" r="8760" b="3878" baseLine="3835">

<wd l="6360" t="3701" r="7282" b="3869"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Evaluation</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7339" t="3701" r="8760" b="3878">47(4):1127–1147.</wd>

</run>

</ln>

</para>

<para l="6144" t="4114" r="10507" b="5616" alignment="justified" li="216" spaceBefore="194" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="4114" r="10507" b="4301" baseLine="4248" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="4114" r="6523" b="4258">John</wd>

<space/>

<wd l="6610" t="4118" r="6792" b="4258">D.</wd>

<space/>

<wd l="6888" t="4114" r="7565" b="4301">Lafferty,</wd>

<space/>

<wd l="7666" t="4114" r="8299" b="4258">Andrew</wd>

<space/>

<wd l="8390" t="4114" r="9283" b="4282">McCallum,</wd>

<space/>

<wd l="9389" t="4114" r="9672" b="4258">and</wd>

<space/>

<wd l="9758" t="4114" r="10507" b="4258">Fernando</wd>

<space/>

</ln>

<ln l="6370" t="4334" r="10493" b="4478" baseLine="4469" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="4334" r="6533" b="4478">C.</wd>

<space/>

<wd l="6629" t="4339" r="6811" b="4478">N.</wd>

<space/>

<wd l="6902" t="4334" r="7507" b="4478">Pereira.</wd>

<space/>

<wd l="7694" t="4334" r="8126" b="4478">2001.</wd>

<space/>

<wd l="8314" t="4334" r="9250" b="4478">Conditional</wd>

<space/>

<wd l="9331" t="4334" r="9946" b="4478">random</wd>

<space/>

<wd l="10027" t="4334" r="10493" b="4478">fields:</wd>

<space/>

</ln>

<ln l="6365" t="4550" r="10502" b="4738" baseLine="4690" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="4550" r="7368" b="4694">Probabilistic</wd>

<space/>

<wd l="7411" t="4550" r="7982" b="4694">models</wd>

<space/>

<wd l="8030" t="4550" r="8266" b="4694">for</wd>

<space/>

<wd l="8314" t="4550" r="9216" b="4738">segmenting</wd>

<space/>

<wd l="9269" t="4550" r="9552" b="4694">and</wd>

<space/>

<wd l="9595" t="4550" r="10234" b="4738">labeling</wd>

<space/>

<wd l="10286" t="4594" r="10502" b="4694">se-</wd>

</ln>

<ln l="6370" t="4771" r="10502" b="4954" baseLine="4906">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6370" t="4814" r="6926" b="4954">quence</wd>

<space/>

<wd l="6979" t="4771" r="7344" b="4915">data.</wd>

<space/>

<wd l="7426" t="4776" r="7594" b="4910">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7637" t="4771" r="8621" b="4954">Proceedings</wd>

<space/>

<wd l="8674" t="4771" r="8851" b="4954">of</wd>

<space/>

<wd l="8870" t="4771" r="9106" b="4915">the</wd>

<space/>

<wd l="9149" t="4771" r="10013" b="4954">Eighteenth</wd>

<space/>

<wd l="10056" t="4776" r="10502" b="4915">Inter-</wd>

</run>

</ln>

<ln l="6365" t="4992" r="10507" b="5174" baseLine="5126">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="4992" r="7032" b="5136">national</wd>

<space/>

<wd l="7118" t="4992" r="8016" b="5174">Conference</wd>

<space/>

<wd l="8102" t="5040" r="8290" b="5136">on</wd>

<space/>

<wd l="8371" t="4992" r="9067" b="5136">Machine</wd>

<space/>

</run>

<wd l="9144" t="4997" r="9922" b="5174"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Learning</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="10013" t="4992" r="10507" b="5136">ICML</wd>

<space/>

</run>

</ln>

<ln l="6379" t="5208" r="10502" b="5395" baseLine="5347" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6379" t="5208" r="6672" b="5376">’01,</wd>

<space/>

<wd l="6725" t="5251" r="7176" b="5395">pages</wd>

<space/>

<wd l="7234" t="5208" r="7968" b="5376">282–289,</wd>

<space/>

<wd l="8026" t="5208" r="8314" b="5352">San</wd>

<space/>

<wd l="8366" t="5208" r="9182" b="5376">Francisco,</wd>

<space/>

<wd l="9245" t="5208" r="9557" b="5376">CA,</wd>

<space/>

<wd l="9610" t="5208" r="10046" b="5352">USA.</wd>

<space/>

<wd l="10104" t="5213" r="10502" b="5352">Mor-</wd>

</ln>

<ln l="6370" t="5429" r="8765" b="5616" baseLine="5563" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="5472" r="6648" b="5616">gan</wd>

<space/>

<wd l="6706" t="5429" r="7541" b="5573">Kaufmann</wd>

<space/>

<wd l="7589" t="5429" r="8414" b="5573">Publishers</wd>

<space/>

<wd l="8472" t="5434" r="8765" b="5573">Inc.</wd>

</ln>

</para>

<para l="6144" t="5842" r="10512" b="6422" alignment="justified" li="216" spaceBefore="199" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="5842" r="10498" b="6010" baseLine="5976" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5842" r="6528" b="5986">Alex</wd>

<space/>

<wd l="6586" t="5842" r="7094" b="6010">Lamb,</wd>

<space/>

<wd l="7162" t="5842" r="7810" b="5986">Michael</wd>

<space/>

<wd l="7867" t="5846" r="7982" b="5986">J.</wd>

<space/>

<wd l="8054" t="5842" r="8448" b="6010">Paul,</wd>

<space/>

<wd l="8515" t="5842" r="8798" b="5986">and</wd>

<space/>

<wd l="8856" t="5842" r="9288" b="5986">Mark</wd>

<space/>

<wd l="9346" t="5842" r="9960" b="5986">Dredze.</wd>

<space/>

<wd l="10066" t="5842" r="10498" b="5986">2013.</wd>

<space/>

</ln>

<ln l="6370" t="6062" r="10512" b="6250" baseLine="6197" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="6062" r="7214" b="6250">Separating</wd>

<space/>

<wd l="7262" t="6062" r="7560" b="6206">fact</wd>

<space/>

<wd l="7603" t="6062" r="7997" b="6206">from</wd>

<space/>

<wd l="8040" t="6062" r="8386" b="6206">fear:</wd>

<space/>

<wd l="8462" t="6062" r="9173" b="6250">Tracking</wd>

<space/>

<wd l="9226" t="6062" r="9427" b="6206">flu</wd>

<space/>

<wd l="9480" t="6062" r="10258" b="6206">infections</wd>

<space/>

<wd l="10315" t="6106" r="10512" b="6206">on</wd>

<space/>

</ln>

<ln l="6365" t="6278" r="8074" b="6422" baseLine="6418">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="6278" r="6912" b="6422">twitter.</wd>

<space/>

<wd l="6994" t="6283" r="7162" b="6418">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7205" t="6283" r="7373" b="6422">In</wd>

<space/>

</run>

<wd l="7421" t="6283" r="8074" b="6422"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">NAACL</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6144" t="6696" r="10512" b="8414" alignment="justified" li="216" spaceBefore="193" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="6696" r="10498" b="6883" baseLine="6830" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="6696" r="6398" b="6840">Fei</wd>

<space/>

<wd l="6446" t="6696" r="6768" b="6864">Liu,</wd>

<space/>

<wd l="6826" t="6696" r="7430" b="6883">Fuliang</wd>

<space/>

<wd l="7483" t="6701" r="7982" b="6883">Weng,</wd>

<space/>

<wd l="8040" t="6696" r="8779" b="6883">Bingqing</wd>

<space/>

<wd l="8832" t="6701" r="9331" b="6883">Wang,</wd>

<space/>

<wd l="9394" t="6696" r="9677" b="6840">and</wd>

<space/>

<wd l="9725" t="6701" r="10133" b="6883">Yang</wd>

<space/>

<wd l="10186" t="6696" r="10498" b="6840">Liu.</wd>

<space/>

</ln>

<ln l="6370" t="6912" r="10502" b="7080" baseLine="7051" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="6912" r="6802" b="7056">2011.</wd>

<space/>

<wd l="6994" t="6912" r="7742" b="7080">Insertion,</wd>

<space/>

<wd l="7843" t="6912" r="8573" b="7080">Deletion,</wd>

<space/>

<wd l="8678" t="6955" r="8842" b="7056">or</wd>

<space/>

<wd l="8928" t="6912" r="9974" b="7056">Substitution?</wd>

<space/>

<wd l="10138" t="6917" r="10502" b="7056">Nor-</wd>

</ln>

<ln l="6365" t="7133" r="10512" b="7320" baseLine="7267" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="7133" r="7056" b="7320">malizing</wd>

<space/>

<wd l="7128" t="7138" r="7478" b="7277">Text</wd>

<space/>

<wd l="7541" t="7138" r="8309" b="7320">Messages</wd>

<space/>

<wd l="8381" t="7133" r="8990" b="7277">without</wd>

<space/>

<wd l="9053" t="7133" r="10512" b="7320">Pre-categorization</wd>

<space/>

</ln>

<ln l="6365" t="7325" r="10512" b="7536" baseLine="7483">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="7397" r="6629" b="7498">nor</wd>

<space/>

<wd l="6691" t="7354" r="7675" b="7536">Supervision.</wd>

<space/>

<wd l="7781" t="7358" r="7949" b="7493">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8006" t="7354" r="8990" b="7536">Proceedings</wd>

<space/>

<wd l="9053" t="7354" r="9235" b="7536">of</wd>

<space/>

<wd l="9264" t="7354" r="9504" b="7498">the</wd>

<space/>

</run>

<wd l="9562" t="7325" r="9864" b="7498"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">49</run>

<run italic="true" underlined="none" subsuperscript="superscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">th</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9922" t="7354" r="10512" b="7498">Annual</wd>

<space/>

</run>

</ln>

<ln l="6360" t="7570" r="10502" b="7752" baseLine="7709" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="7574" r="7013" b="7752">Meeting</wd>

<space/>

<wd l="7090" t="7570" r="7272" b="7752">of</wd>

<space/>

<wd l="7310" t="7570" r="7550" b="7714">the</wd>

<space/>

<wd l="7603" t="7574" r="8544" b="7714">Association</wd>

<space/>

<wd l="8582" t="7570" r="8856" b="7752">for</wd>

<space/>

<wd l="8923" t="7570" r="10114" b="7752">Computational</wd>

<space/>

<wd l="10176" t="7574" r="10502" b="7714">Lin-</wd>

</ln>

<ln l="6365" t="7790" r="10512" b="7978" baseLine="7925">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="7795" r="7032" b="7973">guistics:</wd>

<space/>

<wd l="7094" t="7795" r="7685" b="7934">Human</wd>

<space/>

<wd l="7728" t="7795" r="8525" b="7973">Language</wd>

<space/>

</run>

<wd l="8582" t="7790" r="9658" b="7973"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Technologies</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9710" t="7834" r="10157" b="7978">pages</wd>

<space/>

<wd l="10210" t="7790" r="10512" b="7934">71–</wd>

<space/>

</run>

</ln>

<ln l="6365" t="8011" r="10502" b="8198" baseLine="8146" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="8011" r="6605" b="8179">76,</wd>

<space/>

<wd l="6672" t="8011" r="7675" b="8198">Stroudsburg,</wd>

<space/>

<wd l="7738" t="8016" r="8016" b="8179">PA,</wd>

<space/>

<wd l="8078" t="8011" r="8515" b="8155">USA.</wd>

<space/>

<wd l="8578" t="8011" r="9518" b="8155">Association</wd>

<space/>

<wd l="9571" t="8011" r="9806" b="8155">for</wd>

<space/>

<wd l="9864" t="8011" r="10502" b="8194">Compu-</wd>

</ln>

<ln l="6365" t="8227" r="7934" b="8414" baseLine="8366" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="8227" r="6960" b="8371">tational</wd>

<space/>

<wd l="7013" t="8227" r="7934" b="8414">Linguistics.</wd>

</ln>

</para>

<para l="6144" t="8645" r="10512" b="9706" alignment="justified" li="216" spaceBefore="200" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="8645" r="10512" b="8832" baseLine="8779" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="8645" r="6398" b="8789">Fei</wd>

<space/>

<wd l="6490" t="8645" r="6806" b="8813">Liu,</wd>

<space/>

<wd l="6912" t="8645" r="7517" b="8832">Fuliang</wd>

<space/>

<wd l="7608" t="8650" r="8112" b="8832">Weng,</wd>

<space/>

<wd l="8218" t="8645" r="8501" b="8789">and</wd>

<space/>

<wd l="8587" t="8645" r="8971" b="8789">Xiao</wd>

<space/>

<wd l="9062" t="8645" r="9523" b="8832">Jiang.</wd>

<space/>

<wd l="9730" t="8645" r="10162" b="8789">2012.</wd>

<space/>

<wd l="10368" t="8650" r="10512" b="8784">A</wd>

<space/>

</ln>

<ln l="6365" t="8861" r="10507" b="9048" baseLine="9000" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6365" t="8861" r="7670" b="9048">Broad-Coverage</wd>

<space/>

<wd l="7766" t="8861" r="8918" b="9005">Normalization</wd>

<space/>

<wd l="9014" t="8861" r="9600" b="9048">System</wd>

<space/>

<wd l="9686" t="8861" r="9922" b="9005">for</wd>

<space/>

<wd l="10018" t="8861" r="10507" b="9005">Social</wd>

<space/>

</ln>

<ln l="6365" t="9082" r="10512" b="9269" baseLine="9216">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6365" t="9082" r="6874" b="9226">Media</wd>

<space/>

<wd l="6941" t="9086" r="7766" b="9269">Language.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="7915" t="9082" r="8899" b="9264">Proceedings</wd>

<space/>

<wd l="8976" t="9082" r="9158" b="9264">of</wd>

<space/>

<wd l="9202" t="9082" r="9437" b="9226">the</wd>

<space/>

<wd l="9509" t="9082" r="9859" b="9226">50th</wd>

<space/>

<wd l="9922" t="9082" r="10512" b="9226">Annual</wd>

<space/>

</run>

</ln>

<ln l="6360" t="9302" r="10502" b="9485" baseLine="9437" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6360" t="9307" r="7013" b="9485">Meeting</wd>

<space/>

<wd l="7090" t="9302" r="7272" b="9485">of</wd>

<space/>

<wd l="7310" t="9302" r="7550" b="9446">the</wd>

<space/>

<wd l="7603" t="9307" r="8544" b="9446">Association</wd>

<space/>

<wd l="8582" t="9302" r="8856" b="9485">for</wd>

<space/>

<wd l="8923" t="9302" r="10114" b="9485">Computational</wd>

<space/>

<wd l="10176" t="9307" r="10502" b="9446">Lin-</wd>

</ln>

<ln l="6365" t="9518" r="10450" b="9706" baseLine="9658">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6365" t="9523" r="7032" b="9701">guistics:</wd>

<space/>

<wd l="7094" t="9523" r="7507" b="9701">Long</wd>

<space/>

<wd l="7560" t="9518" r="8760" b="9701">Papers-Volume</wd>

<space/>

</run>

<wd l="8818" t="9518" r="8952" b="9686"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="9010" t="9562" r="9456" b="9706">pages</wd>

<space/>

<wd l="9533" t="9518" r="10450" b="9662">1035–1044.</wd>

</run>

</ln>

</para>

<para l="6144" t="9931" r="10502" b="10330" alignment="justified" li="216" spaceBefore="188" fli="-216" lsp="exactly" lspExact="221" language="en">

<ln l="6144" t="9931" r="10502" b="10114" baseLine="10070" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="9931" r="6720" b="10075">Naoaki</wd>

<space/>

<wd l="6792" t="9931" r="7488" b="10075">Okazaki.</wd>

<space/>

<wd l="7632" t="9931" r="8064" b="10075">2007.</wd>

<space/>

<wd l="8208" t="9931" r="8885" b="10075">Crfsuite:</wd>

<space/>

<wd l="9000" t="9974" r="9086" b="10075">a</wd>

<space/>

<wd l="9149" t="9931" r="9437" b="10075">fast</wd>

<space/>

<wd l="9504" t="9931" r="10502" b="10114">implementa-</wd>

</ln>

<ln l="6365" t="10152" r="9494" b="10330" baseLine="10291" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="10152" r="6677" b="10296">tion</wd>

<space/>

<wd l="6730" t="10152" r="6898" b="10296">of</wd>

<space/>

<wd l="6946" t="10152" r="7834" b="10296">conditional</wd>

<space/>

<wd l="7886" t="10152" r="8496" b="10296">random</wd>

<space/>

<wd l="8549" t="10152" r="8971" b="10296">fields</wd>

<space/>

<wd l="9034" t="10152" r="9494" b="10330">(crfs).</wd>

</ln>

</para>

<para l="6144" t="10565" r="10507" b="12067" alignment="justified" li="216" spaceBefore="195" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="10565" r="10498" b="10733" baseLine="10704" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="10565" r="6528" b="10709">Alan</wd>

<space/>

<wd l="6614" t="10565" r="7104" b="10733">Ritter,</wd>

<space/>

<wd l="7205" t="10565" r="7555" b="10709">Sam</wd>

<space/>

<wd l="7637" t="10565" r="8117" b="10733">Clark,</wd>

<space/>

<wd l="8213" t="10570" r="8942" b="10733">Mausam,</wd>

<space/>

<wd l="9043" t="10565" r="9326" b="10709">and</wd>

<space/>

<wd l="9413" t="10565" r="9806" b="10709">Oren</wd>

<space/>

<wd l="9883" t="10565" r="10498" b="10709">Etzioni.</wd>

<space/>

</ln>

<ln l="6370" t="10786" r="10502" b="10973" baseLine="10920" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="10786" r="6802" b="10930">2011.</wd>

<space/>

<wd l="6936" t="10786" r="7512" b="10930">Named</wd>

<space/>

<wd l="7584" t="10786" r="8026" b="10973">entity</wd>

<space/>

<wd l="8098" t="10786" r="9005" b="10973">recognition</wd>

<space/>

<wd l="9072" t="10786" r="9226" b="10925">in</wd>

<space/>

<wd l="9293" t="10805" r="9840" b="10930">tweets:</wd>

<space/>

<wd l="9950" t="10790" r="10195" b="10925">An</wd>

<space/>

<wd l="10262" t="10829" r="10502" b="10930">ex-</wd>

</ln>

<ln l="6365" t="11006" r="10507" b="11194" baseLine="11141">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="11006" r="7214" b="11189">perimental</wd>

<space/>

<wd l="7282" t="11006" r="7728" b="11194">study.</wd>

<space/>

<wd l="7829" t="11011" r="7997" b="11146">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8050" t="11006" r="9034" b="11189">Proceedings</wd>

<space/>

<wd l="9096" t="11006" r="9278" b="11189">of</wd>

<space/>

<wd l="9307" t="11006" r="9542" b="11150">the</wd>

<space/>

<wd l="9610" t="11006" r="10507" b="11189">Conference</wd>

<space/>

</run>

</ln>

<ln l="6370" t="11222" r="10502" b="11405" baseLine="11362" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="11270" r="6562" b="11366">on</wd>

<space/>

<wd l="6648" t="11222" r="7454" b="11405">Empirical</wd>

<space/>

<wd l="7536" t="11222" r="8227" b="11366">Methods</wd>

<space/>

<wd l="8328" t="11227" r="8467" b="11366">in</wd>

<space/>

<wd l="8558" t="11222" r="9182" b="11366">Natural</wd>

<space/>

<wd l="9264" t="11227" r="10061" b="11405">Language</wd>

<space/>

<wd l="10157" t="11227" r="10502" b="11366">Pro-</wd>

</ln>

<ln l="6370" t="11443" r="10502" b="11630" baseLine="11578">

<wd l="6370" t="11448" r="6994" b="11626"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">cessing</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7094" t="11448" r="7766" b="11587">EMNLP</wd>

<space/>

<wd l="7867" t="11443" r="8160" b="11611">’11,</wd>

<space/>

<wd l="8261" t="11486" r="8707" b="11630">pages</wd>

<space/>

<wd l="8818" t="11443" r="9734" b="11611">1524–1534,</wd>

<space/>

<wd l="9840" t="11443" r="10502" b="11587">Strouds-</wd>

</run>

</ln>

<ln l="6365" t="11664" r="10502" b="11851" baseLine="11798" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="11664" r="6763" b="11851">burg,</wd>

<space/>

<wd l="6821" t="11669" r="7104" b="11832">PA,</wd>

<space/>

<wd l="7157" t="11664" r="7594" b="11808">USA.</wd>

<space/>

<wd l="7656" t="11664" r="8602" b="11808">Association</wd>

<space/>

<wd l="8650" t="11664" r="8885" b="11808">for</wd>

<space/>

<wd l="8938" t="11664" r="10114" b="11846">Computational</wd>

<space/>

<wd l="10166" t="11664" r="10502" b="11803">Lin-</wd>

</ln>

<ln l="6370" t="11880" r="7013" b="12067" baseLine="12019" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="11880" r="7013" b="12067">guistics.</wd>

</ln>

</para>

<para l="6144" t="12298" r="10517" b="13560" alignment="justified" li="216" spaceBefore="193" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6144" t="12298" r="10498" b="12466" baseLine="12432" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="12298" r="6528" b="12442">Alan</wd>

<space/>

<wd l="6614" t="12298" r="7104" b="12466">Ritter,</wd>

<space/>

<wd l="7200" t="12302" r="7930" b="12466">Mausam,</wd>

<space/>

<wd l="8026" t="12298" r="8419" b="12442">Oren</wd>

<space/>

<wd l="8501" t="12298" r="9120" b="12466">Etzioni,</wd>

<space/>

<wd l="9221" t="12298" r="9504" b="12442">and</wd>

<space/>

<wd l="9590" t="12298" r="9941" b="12442">Sam</wd>

<space/>

<wd l="10022" t="12298" r="10498" b="12442">Clark.</wd>

<space/>

</ln>

<ln l="6370" t="12514" r="10498" b="12696" baseLine="12653" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6370" t="12514" r="6802" b="12658">2012.</wd>

<space/>

<wd l="6960" t="12514" r="7387" b="12696">Open</wd>

<space/>

<wd l="7464" t="12514" r="8059" b="12658">domain</wd>

<space/>

<wd l="8136" t="12533" r="8554" b="12658">event</wd>

<space/>

<wd l="8630" t="12514" r="9422" b="12658">extraction</wd>

<space/>

<wd l="9494" t="12514" r="9883" b="12658">from</wd>

<space/>

<wd l="9950" t="12514" r="10498" b="12658">twitter.</wd>

<space/>

</ln>

<ln l="6365" t="12734" r="10502" b="12917" baseLine="12869">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="12739" r="6533" b="12874">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6624" t="12734" r="7608" b="12917">Proceedings</wd>

<space/>

<wd l="7709" t="12734" r="7886" b="12917">of</wd>

<space/>

<wd l="7954" t="12734" r="8189" b="12878">the</wd>

<space/>

<wd l="8294" t="12734" r="8635" b="12878">18th</wd>

<space/>

<wd l="8717" t="12739" r="9158" b="12878">ACM</wd>

<space/>

<wd l="9240" t="12739" r="9970" b="12878">SIGKDD</wd>

<space/>

<wd l="10056" t="12739" r="10502" b="12878">Inter-</wd>

</run>

</ln>

<ln l="6365" t="12955" r="10517" b="13138" baseLine="13090" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="12955" r="7032" b="13099">national</wd>

<space/>

<wd l="7114" t="12955" r="8011" b="13138">Conference</wd>

<space/>

<wd l="8093" t="13003" r="8285" b="13099">on</wd>

<space/>

<wd l="8362" t="12955" r="9254" b="13138">Knowledge</wd>

<space/>

<wd l="9326" t="12960" r="10138" b="13138">Discovery</wd>

<space/>

<wd l="10214" t="12955" r="10517" b="13099">and</wd>

<space/>

</ln>

<ln l="6360" t="13171" r="10507" b="13358" baseLine="13310">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="13176" r="6758" b="13315">Data</wd>

<space/>

</run>

<wd l="6854" t="13176" r="7478" b="13354"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Mining</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7594" t="13176" r="8016" b="13310">KDD</wd>

<space/>

<wd l="8131" t="13171" r="8424" b="13339">’12,</wd>

<space/>

<wd l="8539" t="13214" r="8986" b="13358">pages</wd>

<space/>

<wd l="9106" t="13171" r="10027" b="13339">1104–1112,</wd>

<space/>

<wd l="10138" t="13176" r="10507" b="13315">New</wd>

<space/>

</run>

</ln>

<ln l="6365" t="13392" r="8194" b="13560" baseLine="13526" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="13392" r="6797" b="13560">York,</wd>

<space/>

<wd l="6850" t="13397" r="7157" b="13560">NY,</wd>

<space/>

<wd l="7214" t="13392" r="7651" b="13536">USA.</wd>

<space/>

<wd l="7709" t="13392" r="8194" b="13536">ACM.</wd>

</ln>

</para>

<para l="6144" t="13805" r="10522" b="15307" alignment="justified" li="216" spaceBefore="200" fli="-216" lsp="exactly" lspExact="218" language="en">

<ln l="6144" t="13805" r="10512" b="13992" baseLine="13944" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6144" t="13805" r="6528" b="13949">Alan</wd>

<space/>

<wd l="6619" t="13805" r="7109" b="13973">Ritter,</wd>

<space/>

<wd l="7214" t="13810" r="7622" b="13949">Evan</wd>

<space/>

<wd l="7709" t="13805" r="8318" b="13992">Wright,</wd>

<space/>

<wd l="8419" t="13805" r="9072" b="13949">William</wd>

<space/>

<wd l="9158" t="13805" r="9667" b="13992">Casey,</wd>

<space/>

<wd l="9778" t="13805" r="10061" b="13949">and</wd>

<space/>

<wd l="10152" t="13810" r="10512" b="13949">Tom</wd>

<space/>

</ln>

<ln l="6365" t="14026" r="10522" b="14213" baseLine="14160" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6365" t="14026" r="7075" b="14170">Mitchell.</wd>

<space/>

<wd l="7258" t="14026" r="7690" b="14170">2015.</wd>

<space/>

<wd l="7862" t="14026" r="8462" b="14213">Weakly</wd>

<space/>

<wd l="8554" t="14026" r="9398" b="14208">supervised</wd>

<space/>

<wd l="9480" t="14026" r="10267" b="14170">extraction</wd>

<space/>

<wd l="10349" t="14026" r="10522" b="14170">of</wd>

<space/>

</ln>

<ln l="6370" t="14242" r="10502" b="14429" baseLine="14381">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6370" t="14261" r="7118" b="14424">computer</wd>

<space/>

<wd l="7190" t="14242" r="7805" b="14429">security</wd>

<space/>

<wd l="7877" t="14261" r="8366" b="14386">events</wd>

<space/>

<wd l="8438" t="14242" r="8827" b="14386">from</wd>

<space/>

<wd l="8890" t="14242" r="9437" b="14386">twitter.</wd>

<space/>

<wd l="9562" t="14246" r="9730" b="14381">In</wd>

<space/>

</run>

<wd l="9792" t="14242" r="10502" b="14386" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Proceed-</wd>

</ln>

<ln l="6374" t="14462" r="10517" b="14645" baseLine="14602" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6374" t="14467" r="6696" b="14645">ings</wd>

<space/>

<wd l="6758" t="14462" r="6941" b="14645">of</wd>

<space/>

<wd l="6974" t="14462" r="7214" b="14606">the</wd>

<space/>

<wd l="7277" t="14462" r="7627" b="14606">24th</wd>

<space/>

<wd l="7685" t="14462" r="8741" b="14606">International</wd>

<space/>

<wd l="8808" t="14462" r="9710" b="14645">Conference</wd>

<space/>

<wd l="9778" t="14510" r="9970" b="14606">on</wd>

<space/>

<wd l="10042" t="14462" r="10517" b="14606">World</wd>

<space/>

</ln>

<ln l="6374" t="14683" r="10512" b="14870" baseLine="14818">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6374" t="14683" r="6758" b="14827">Wide</wd>

<space/>

</run>

<wd l="6811" t="14683" r="7181" b="14851"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Web</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="7229" t="14688" r="7790" b="14827">WWW</wd>

<space/>

<wd l="7843" t="14683" r="8136" b="14851">’15,</wd>

<space/>

<wd l="8184" t="14726" r="8635" b="14870">pages</wd>

<space/>

<wd l="8688" t="14683" r="9418" b="14851">896–905,</wd>

<space/>

<wd l="9466" t="14683" r="10181" b="14866">Republic</wd>

<space/>

<wd l="10229" t="14683" r="10512" b="14827">and</wd>

<space/>

</run>

</ln>

<ln l="6370" t="14899" r="10512" b="15067" baseLine="15038" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6370" t="14899" r="6941" b="15043">Canton</wd>

<space/>

<wd l="6994" t="14899" r="7166" b="15043">of</wd>

<space/>

<wd l="7210" t="14899" r="7843" b="15067">Geneva,</wd>

<space/>

<wd l="7906" t="14899" r="8890" b="15043">Switzerland.</wd>

<space/>

<wd l="8952" t="14899" r="9965" b="15043">International</wd>

<space/>

<wd l="10018" t="14899" r="10512" b="15043">World</wd>

<space/>

</ln>

<ln l="6365" t="15120" r="9931" b="15307" baseLine="15259" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6365" t="15120" r="6787" b="15264">Wide</wd>

<space/>

<wd l="6835" t="15120" r="7195" b="15264">Web</wd>

<space/>

<wd l="7253" t="15120" r="8237" b="15264">Conferences</wd>

<space/>

<wd l="8299" t="15120" r="8952" b="15307">Steering</wd>

<space/>

<wd l="9010" t="15120" r="9931" b="15264">Committee.</wd>

</ln>

</para>

</column>

</section>

<dd l="5738" t="15746" r="6233" b="15975">

<para l="5771" t="15792" r="6200" b="15941" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5837" t="15792" r="6134" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="15">

<wd l="5837" t="15792" r="6134" b="15941">124</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4318.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1280" marginRight="6089" marginBottom="858" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1280" r="5820" b="15980">

<column l="1440" t="1280" r="5820" b="15980">

<para l="1440" t="1334" r="5803" b="2578" alignment="justified" li="216" spaceBefore="25" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="1334" r="5794" b="1502" baseLine="1474" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1440" t="1334" r="2050" b="1478">Takeshi</wd>

<space/>

<wd l="2112" t="1334" r="2693" b="1502">Sakaki,</wd>

<space/>

<wd l="2760" t="1334" r="3374" b="1478">Makoto</wd>

<space/>

<wd l="3437" t="1334" r="4142" b="1502">Okazaki,</wd>

<space/>

<wd l="4210" t="1334" r="4493" b="1478">and</wd>

<space/>

<wd l="4550" t="1334" r="5102" b="1478">Yutaka</wd>

<space/>

<wd l="5160" t="1339" r="5794" b="1478">Matsuo.</wd>

<space/>

</ln>

<ln l="1661" t="1555" r="5803" b="1738" baseLine="1694" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1661" t="1555" r="2093" b="1699">2010.</wd>

<space/>

<wd l="2261" t="1555" r="3163" b="1738">Earthquake</wd>

<space/>

<wd l="3250" t="1555" r="3763" b="1699">shakes</wd>

<space/>

<wd l="3845" t="1555" r="4368" b="1699">twitter</wd>

<space/>

<wd l="4440" t="1598" r="4891" b="1699">users:</wd>

<space/>

<wd l="5021" t="1555" r="5803" b="1699">Real-time</wd>

<space/>

</ln>

<ln l="1661" t="1776" r="5803" b="1963" baseLine="1910">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1661" t="1795" r="2083" b="1920">event</wd>

<space/>

<wd l="2165" t="1776" r="2890" b="1920">detection</wd>

<space/>

<wd l="2966" t="1776" r="3163" b="1963">by</wd>

<space/>

<wd l="3250" t="1776" r="3706" b="1920">social</wd>

<space/>

<wd l="3792" t="1819" r="4406" b="1920">sensors.</wd>

<space/>

<wd l="4574" t="1781" r="4742" b="1915">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4819" t="1776" r="5803" b="1958">Proceedings</wd>

<space/>

</run>

</ln>

<ln l="1661" t="1992" r="5803" b="2174" baseLine="2131" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1661" t="1992" r="1843" b="2174">of</wd>

<space/>

<wd l="1867" t="1992" r="2107" b="2136">the</wd>

<space/>

<wd l="2170" t="1992" r="2510" b="2136">19th</wd>

<space/>

<wd l="2558" t="1992" r="3614" b="2136">International</wd>

<space/>

<wd l="3677" t="1992" r="4574" b="2174">Conference</wd>

<space/>

<wd l="4632" t="2040" r="4824" b="2136">on</wd>

<space/>

<wd l="4886" t="1992" r="5362" b="2136">World</wd>

<space/>

<wd l="5414" t="1992" r="5803" b="2136">Wide</wd>

<space/>

</ln>

<ln l="1666" t="2213" r="5798" b="2400" baseLine="2352">

<wd l="1666" t="2213" r="2035" b="2381"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Web</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2126" t="2218" r="2688" b="2357">WWW</wd>

<space/>

<wd l="2784" t="2213" r="3077" b="2381">’10,</wd>

<space/>

<wd l="3163" t="2256" r="3614" b="2400">pages</wd>

<space/>

<wd l="3706" t="2213" r="4435" b="2381">851–860,</wd>

<space/>

<wd l="4526" t="2218" r="4891" b="2357">New</wd>

<space/>

<wd l="4973" t="2213" r="5405" b="2381">York,</wd>

<space/>

<wd l="5496" t="2218" r="5798" b="2381">NY,</wd>

<space/>

</run>

</ln>

<ln l="1656" t="2434" r="2640" b="2578" baseLine="2568" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1656" t="2434" r="2093" b="2578">USA.</wd>

<space/>

<wd l="2155" t="2434" r="2640" b="2578">ACM.</wd>

</ln>

</para>

<para l="1440" t="2832" r="5808" b="3677" alignment="justified" li="216" spaceBefore="182" spaceAfter="12292" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="1440" t="2832" r="5808" b="3019" baseLine="2966" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1440" t="2832" r="1627" b="2971">Yi</wd>

<space/>

<wd l="1699" t="2837" r="2107" b="3019">Yang</wd>

<space/>

<wd l="2189" t="2832" r="2477" b="2976">and</wd>

<space/>

<wd l="2544" t="2832" r="2995" b="2976">Jacob</wd>

<space/>

<wd l="3072" t="2832" r="3931" b="2976">Eisenstein.</wd>

<space/>

<wd l="4094" t="2832" r="4526" b="2976">2013.</wd>

<space/>

<wd l="4680" t="2837" r="4824" b="2971">A</wd>

<space/>

<wd l="4896" t="2832" r="5808" b="3019">Log-Linear</wd>

<space/>

</ln>

<ln l="1656" t="3048" r="5798" b="3230" baseLine="3187">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="3048" r="2174" b="3192">Model</wd>

<space/>

<wd l="2246" t="3048" r="2477" b="3192">for</wd>

<space/>

<wd l="2544" t="3048" r="3643" b="3230">Unsupervised</wd>

<space/>

<wd l="3706" t="3053" r="4056" b="3192">Text</wd>

<space/>

<wd l="4123" t="3048" r="5314" b="3192">Normalization.</wd>

<space/>

</run>

<wd l="5448" t="3053" r="5798" b="3192" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Pro-</wd>

</ln>

<ln l="1661" t="3269" r="5798" b="3451" baseLine="3408" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1661" t="3269" r="2357" b="3451">ceedings</wd>

<space/>

<wd l="2414" t="3269" r="2597" b="3451">of</wd>

<space/>

<wd l="2621" t="3269" r="2861" b="3413">the</wd>

<space/>

<wd l="2909" t="3269" r="3715" b="3451">Empirical</wd>

<space/>

<wd l="3758" t="3269" r="4450" b="3413">Methods</wd>

<space/>

<wd l="4512" t="3317" r="4699" b="3413">on</wd>

<space/>

<wd l="4752" t="3269" r="5376" b="3413">Natural</wd>

<space/>

<wd l="5424" t="3274" r="5798" b="3413">Lan-</wd>

</ln>

<ln l="1656" t="3490" r="5045" b="3677" baseLine="3624">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="3538" r="2141" b="3672">guage</wd>

<space/>

<wd l="2189" t="3494" r="3062" b="3672">Processing</wd>

<space/>

</run>

<wd l="3120" t="3494" r="3946" b="3667"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(EMNLP)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4003" t="3533" r="4450" b="3677">pages</wd>

<space/>

<wd l="4512" t="3490" r="5045" b="3634">61–72.</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="5820" t="15736" r="6224" b="15980">

<para l="5820" t="15787" r="6191" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5837" t="15787" r="6125" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="-12">

<wd l="5837" t="15787" r="6125" b="15946">125</wd>

</ln>

</para>

</dd>

</body>

</page>

</document>

