<?xml version="1.0" encoding="UTF-16"?>

<!--XML document generated using OCR technology from Nuance Communications, Inc.-->

<document xmlns="http://www.scansoft.com/omnipage/xml/ssdoc-schema3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4313.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1421" marginTop="1420" marginRight="1402" marginBottom="358" offsetX="-6" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1421" t="1420" r="10507" b="5266">

<column l="1421" t="1420" r="10507" b="5266">

<para l="1488" t="1493" r="10435" b="2112" alignment="left" li="2376" ri="72" spaceBefore="13" fli="-2304" lsp="exactly" lspExact="346" language="en">

<ln l="1488" t="1493" r="10435" b="1766" baseLine="1699" bold="true" underlined="none" subsuperscript="none" fontSize="1500" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1488" t="1498" r="3749" b="1766">NCSU-SAS-Ning:</wd>

<space/>

<wd l="3854" t="1498" r="5174" b="1704">Candidate</wd>

<space/>

<wd l="5266" t="1498" r="6696" b="1704">Generation</wd>

<space/>

<wd l="6787" t="1498" r="7258" b="1704">and</wd>

<space/>

<wd l="7344" t="1498" r="8333" b="1704">Feature</wd>

<space/>

<wd l="8419" t="1498" r="9974" b="1766">Engineering</wd>

<space/>

<wd l="10066" t="1498" r="10435" b="1704">for</wd>

<space/>

</ln>

<ln l="3802" t="1838" r="8126" b="2112" baseLine="2045" bold="true" underlined="none" subsuperscript="none" fontSize="1500" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3802" t="1843" r="5198" b="2112">Supervised</wd>

<space/>

<wd l="5285" t="1843" r="6211" b="2050">Lexical</wd>

<space/>

<wd l="6288" t="1843" r="8126" b="2050">Normalization</wd>

</ln>

</para>

<para l="4814" t="2976" r="7114" b="4330" alignment="centered" spaceBefore="785" spaceAfter="926" lsp="exactly" lspExact="284" language="en">

<ln l="5525" t="2976" r="6394" b="3197" baseLine="3144" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5525" t="2981" r="6019" b="3197">Ning</wd>

<space/>

<wd l="6082" t="2981" r="6394" b="3149">Jin
</wd>

</ln>

<ln l="4968" t="3254" r="6955" b="3475" baseLine="3422" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="4968" t="3259" r="5405" b="3427">Text</wd>

<space/>

<wd l="5462" t="3254" r="6374" b="3475">Analytics</wd>

<space/>

<wd l="6442" t="3259" r="6955" b="3427">R&amp;D
</wd>

</ln>

<ln l="5093" t="3528" r="6830" b="3739" baseLine="3696" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5093" t="3533" r="5506" b="3701">SAS</wd>

<space/>

<wd l="5582" t="3528" r="6413" b="3739">Institute,</wd>

<space/>

<wd l="6490" t="3533" r="6830" b="3701">Inc.
</wd>

</ln>

<ln l="5208" t="3811" r="6720" b="4027" baseLine="3974" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5208" t="3811" r="5717" b="4027">Cary,</wd>

<space/>

<wd l="5784" t="3811" r="6173" b="4018">NC,</wd>

<space/>

<wd l="6245" t="3811" r="6720" b="3979">USA
</wd>

</ln>

<ln l="4814" t="4128" r="7114" b="4330" baseLine="4282" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0" forcedEOF="true">

<wd l="4814" t="4128" r="7114" b="4330">Ning.Jin@sas.com</wd>

</ln>

</para>

</column>

</section>

<section l="1421" t="5266" r="10507" b="15297">

<column l="1421" t="5266" r="5798" b="15297">

<para l="3163" t="5558" r="4056" b="5726" alignment="centered" spaceBefore="245" lsp="exactly" lspExact="273" language="en">

<ln l="3163" t="5558" r="4056" b="5726" baseLine="5722" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3163" t="5558" r="4056" b="5726">Abstract</wd>

</ln>

</para>

<para l="1762" t="6062" r="5453" b="9763" alignment="justified" li="288" ri="360" spaceBefore="235" lsp="exactly" lspExact="253" language="en">

<ln l="1766" t="6062" r="5443" b="6264" baseLine="6216" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1766" t="6072" r="2184" b="6221">User</wd>

<space/>

<wd l="2333" t="6062" r="3182" b="6264">generated</wd>

<space/>

<wd l="3331" t="6086" r="3979" b="6221">content</wd>

<space/>

<wd l="4123" t="6062" r="4574" b="6221">often</wd>

<space/>

<wd l="4723" t="6062" r="5443" b="6221">contains</wd>

<space/>

</ln>

<ln l="1766" t="6317" r="5448" b="6475" baseLine="6470" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1766" t="6317" r="2914" b="6475">non-standard</wd>

<space/>

<wd l="3000" t="6317" r="3528" b="6475">words</wd>

<space/>

<wd l="3624" t="6317" r="3955" b="6475">that</wd>

<space/>

<wd l="4037" t="6317" r="4603" b="6475">hinder</wd>

<space/>

<wd l="4690" t="6317" r="5448" b="6475">effective</wd>

<space/>

</ln>

<ln l="1771" t="6571" r="5438" b="6773" baseLine="6725" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1771" t="6571" r="2626" b="6730">automatic</wd>

<space/>

<wd l="2722" t="6595" r="3053" b="6730">text</wd>

<space/>

<wd l="3139" t="6571" r="4128" b="6773">processing.</wd>

<space/>

<wd l="4234" t="6581" r="4411" b="6725">In</wd>

<space/>

<wd l="4502" t="6571" r="4810" b="6730">this</wd>

<space/>

<wd l="4901" t="6624" r="5438" b="6773">paper,</wd>

<space/>

</ln>

<ln l="1766" t="6821" r="5443" b="7022" baseLine="6974" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1766" t="6874" r="2016" b="6979">we</wd>

<space/>

<wd l="2074" t="6845" r="2717" b="7022">present</wd>

<space/>

<wd l="2779" t="6874" r="2870" b="6979">a</wd>

<space/>

<wd l="2938" t="6845" r="3533" b="7022">system</wd>

<space/>

<wd l="3595" t="6874" r="3845" b="6979">we</wd>

<space/>

<wd l="3912" t="6821" r="4814" b="7022">developed</wd>

<space/>

<wd l="4872" t="6845" r="5030" b="6979">to</wd>

<space/>

<wd l="5093" t="6874" r="5443" b="7022">per-</wd>

</ln>

<ln l="1771" t="7075" r="5453" b="7277" baseLine="7229" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1771" t="7075" r="2189" b="7234">form</wd>

<space/>

<wd l="2328" t="7075" r="2909" b="7234">lexical</wd>

<space/>

<wd l="3038" t="7075" r="4262" b="7234">normalization</wd>

<space/>

<wd l="4397" t="7075" r="4651" b="7234">for</wd>

<space/>

<wd l="4781" t="7075" r="5453" b="7277">English</wd>

<space/>

</ln>

<ln l="1771" t="7330" r="5443" b="7531" baseLine="7483" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1771" t="7330" r="2419" b="7488">Twitter</wd>

<space/>

<wd l="2496" t="7354" r="2866" b="7488">text.</wd>

<space/>

<wd l="2966" t="7339" r="3096" b="7488">It</wd>

<space/>

<wd l="3182" t="7330" r="3533" b="7488">first</wd>

<space/>

<wd l="3614" t="7354" r="4435" b="7531">generates</wd>

<space/>

<wd l="4526" t="7330" r="5443" b="7488">candidates</wd>

<space/>

</ln>

<ln l="1762" t="7584" r="5448" b="7786" baseLine="7738" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1762" t="7584" r="2266" b="7742">based</wd>

<space/>

<wd l="2395" t="7637" r="2611" b="7742">on</wd>

<space/>

<wd l="2731" t="7608" r="3091" b="7786">past</wd>

<space/>

<wd l="3211" t="7584" r="4171" b="7786">knowledge</wd>

<space/>

<wd l="4306" t="7584" r="4618" b="7742">and</wd>

<space/>

<wd l="4747" t="7637" r="4838" b="7742">a</wd>

<space/>

<wd l="4963" t="7584" r="5448" b="7742">novel</wd>

<space/>

</ln>

<ln l="1776" t="7834" r="5453" b="8035" baseLine="7987" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1776" t="7834" r="2266" b="8035">string</wd>

<space/>

<wd l="2395" t="7834" r="3230" b="8035">similarity</wd>

<space/>

<wd l="3346" t="7858" r="4522" b="7992">measurement</wd>

<space/>

<wd l="4642" t="7834" r="4954" b="7992">and</wd>

<space/>

<wd l="5074" t="7834" r="5453" b="7992">then</wd>

<space/>

</ln>

<ln l="1776" t="8088" r="5453" b="8290" baseLine="8242" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1776" t="8088" r="2342" b="8246">selects</wd>

<space/>

<wd l="2424" t="8141" r="2515" b="8246">a</wd>

<space/>

<wd l="2592" t="8088" r="3422" b="8246">candidate</wd>

<space/>

<wd l="3494" t="8088" r="3970" b="8290">using</wd>

<space/>

<wd l="4046" t="8088" r="4728" b="8246">features</wd>

<space/>

<wd l="4810" t="8088" r="5453" b="8246">learned</wd>

<space/>

</ln>

<ln l="1771" t="8342" r="5443" b="8544" baseLine="8496" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1771" t="8342" r="2189" b="8501">from</wd>

<space/>

<wd l="2256" t="8342" r="2933" b="8544">training</wd>

<space/>

<wd l="3000" t="8342" r="3408" b="8501">data.</wd>

<space/>

<wd l="3485" t="8342" r="3816" b="8501">The</wd>

<space/>

<wd l="3888" t="8366" r="4488" b="8544">system</wd>

<space/>

<wd l="4550" t="8342" r="4834" b="8501">has</wd>

<space/>

<wd l="4906" t="8395" r="4997" b="8501">a</wd>

<space/>

<wd l="5064" t="8395" r="5443" b="8501">con-</wd>

</ln>

<ln l="1776" t="8592" r="5453" b="8750" baseLine="8746" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1776" t="8592" r="2462" b="8750">strained</wd>

<space/>

<wd l="2650" t="8592" r="3130" b="8750">mode</wd>

<space/>

<wd l="3326" t="8592" r="3638" b="8750">and</wd>

<space/>

<wd l="3830" t="8645" r="4027" b="8750">an</wd>

<space/>

<wd l="4219" t="8592" r="5453" b="8750">unconstrained</wd>

<space/>

</ln>

<ln l="1766" t="8846" r="5453" b="9048" baseLine="9000" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1766" t="8846" r="2299" b="9005">mode.</wd>

<space/>

<wd l="2381" t="8846" r="2712" b="9005">The</wd>

<space/>

<wd l="2784" t="8846" r="3797" b="9005">constrained</wd>

<space/>

<wd l="3859" t="8846" r="4344" b="9005">mode</wd>

<space/>

<wd l="4406" t="8846" r="5453" b="9048">participated</wd>

<space/>

</ln>

<ln l="1771" t="9101" r="5443" b="9302" baseLine="9254" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1771" t="9101" r="1939" b="9254">in</wd>

<space/>

<wd l="1992" t="9101" r="2256" b="9259">the</wd>

<space/>

<wd l="2318" t="9110" r="3048" b="9259">W-NUT</wd>

<space/>

<wd l="3106" t="9101" r="3586" b="9302">noisy</wd>

<space/>

<wd l="3638" t="9101" r="4315" b="9302">English</wd>

<space/>

<wd l="4368" t="9125" r="4699" b="9259">text</wd>

<space/>

<wd l="4757" t="9101" r="5443" b="9259">normal-</wd>

</ln>

<ln l="1771" t="9355" r="5443" b="9557" baseLine="9509" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1771" t="9355" r="2366" b="9514">ization</wd>

<space/>

<wd l="2424" t="9355" r="3470" b="9557">competition</wd>

<space/>

<wd l="3533" t="9355" r="4344" b="9557">(Baldwin</wd>

<space/>

<wd l="4406" t="9379" r="4560" b="9514">et</wd>

<space/>

<wd l="4618" t="9355" r="4872" b="9547">al.,</wd>

<space/>

<wd l="4939" t="9355" r="5443" b="9557">2015)</wd>

<space/>

</ln>

<ln l="1771" t="9605" r="4502" b="9763" baseLine="9758" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1771" t="9605" r="2083" b="9763">and</wd>

<space/>

<wd l="2146" t="9605" r="2923" b="9763">achieved</wd>

<space/>

<wd l="2976" t="9605" r="3240" b="9763">the</wd>

<space/>

<wd l="3293" t="9605" r="3653" b="9763">best</wd>

<space/>

<wd l="3710" t="9610" r="3912" b="9758">F1</wd>

<space/>

<wd l="4003" t="9658" r="4502" b="9763">score.</wd>

</ln>

</para>

<para l="1440" t="10109" r="3158" b="10277" alignment="left" spaceBefore="247" lsp="exactly" lspExact="273" language="en">

<ln l="1440" t="10109" r="3158" b="10277" baseLine="10272" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="15">

<wd l="1440" t="10109" r="1526" b="10272">1</wd>

<space/>

<wd l="1862" t="10109" r="3158" b="10277">Introduction</wd>

</ln>

</para>

<para l="1421" t="10536" r="5794" b="14530" alignment="justified" spaceBefore="156" lsp="exactly" lspExact="252" language="en">

<ln l="1426" t="10536" r="5784" b="10738" baseLine="10690" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10546" r="1843" b="10694">User</wd>

<space/>

<wd l="1973" t="10536" r="2822" b="10738">generated</wd>

<space/>

<wd l="2952" t="10560" r="3638" b="10728">content,</wd>

<space/>

<wd l="3787" t="10536" r="4181" b="10694">such</wd>

<space/>

<wd l="4310" t="10589" r="4483" b="10694">as</wd>

<space/>

<wd l="4622" t="10560" r="5424" b="10694">customer</wd>

<space/>

<wd l="5549" t="10589" r="5784" b="10694">re-</wd>

</ln>

<ln l="1426" t="10786" r="5794" b="10987" baseLine="10939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10786" r="1982" b="10978">views,</wd>

<space/>

<wd l="2146" t="10786" r="2674" b="10944">forum</wd>

<space/>

<wd l="2827" t="10786" r="3869" b="10978">discussions,</wd>

<space/>

<wd l="4022" t="10810" r="4354" b="10944">text</wd>

<space/>

<wd l="4498" t="10838" r="5318" b="10987">messages</wd>

<space/>

<wd l="5477" t="10786" r="5794" b="10944">and</wd>

<space/>

</ln>

<ln l="1430" t="11040" r="5784" b="11242" baseLine="11194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="11040" r="2078" b="11198">Twitter</wd>

<space/>

<wd l="2141" t="11064" r="2510" b="11232">text,</wd>

<space/>

<wd l="2592" t="11040" r="2726" b="11198">is</wd>

<space/>

<wd l="2808" t="11040" r="3010" b="11198">of</wd>

<space/>

<wd l="3053" t="11064" r="3490" b="11242">great</wd>

<space/>

<wd l="3557" t="11040" r="4027" b="11198">value</wd>

<space/>

<wd l="4104" t="11040" r="4267" b="11194">in</wd>

<space/>

<wd l="4339" t="11040" r="5386" b="11242">applications</wd>

<space/>

<wd l="5467" t="11040" r="5784" b="11198">like</wd>

<space/>

</ln>

<ln l="1426" t="11294" r="5794" b="11496" baseLine="11448" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11294" r="2669" b="11496">understanding</wd>

<space/>

<wd l="2918" t="11347" r="3413" b="11486">users,</wd>

<space/>

<wd l="3672" t="11294" r="4128" b="11453">trend</wd>

<space/>

<wd l="4378" t="11294" r="5227" b="11496">discovery</wd>

<space/>

<wd l="5477" t="11294" r="5794" b="11453">and</wd>

<space/>

</ln>

<ln l="1430" t="11549" r="5784" b="11750" baseLine="11702" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="11549" r="2774" b="11750">crowdsourcing.</wd>

<space/>

<wd l="2938" t="11558" r="3245" b="11707">For</wd>

<space/>

<wd l="3398" t="11549" r="4181" b="11750">example,</wd>

<space/>

<wd l="4339" t="11549" r="4565" b="11750">by</wd>

<space/>

<wd l="4714" t="11549" r="5371" b="11750">reading</wd>

<space/>

<wd l="5525" t="11549" r="5784" b="11707">the</wd>

<space/>

</ln>

<ln l="1430" t="11798" r="5794" b="12000" baseLine="11952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="11798" r="2078" b="11957">Twitter</wd>

<space/>

<wd l="2184" t="11822" r="2515" b="11957">text</wd>

<space/>

<wd l="2616" t="11798" r="3197" b="12000">posted</wd>

<space/>

<wd l="3298" t="11798" r="3523" b="12000">by</wd>

<space/>

<wd l="3634" t="11851" r="3725" b="11957">a</wd>

<space/>

<wd l="3835" t="11851" r="4248" b="11990">user,</wd>

<space/>

<wd l="4373" t="11851" r="4464" b="11957">a</wd>

<space/>

<wd l="4574" t="11851" r="5381" b="12000">company</wd>

<space/>

<wd l="5491" t="11851" r="5794" b="11957">can</wd>

<space/>

</ln>

<ln l="1430" t="12053" r="5789" b="12254" baseLine="12206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12053" r="1867" b="12211">learn</wd>

<space/>

<wd l="1939" t="12053" r="2198" b="12211">the</wd>

<space/>

<wd l="2280" t="12058" r="2794" b="12211">user’s</wd>

<space/>

<wd l="2870" t="12053" r="3878" b="12254">preferences</wd>

<space/>

<wd l="3965" t="12053" r="4277" b="12211">and</wd>

<space/>

<wd l="4354" t="12053" r="5390" b="12211">connections</wd>

<space/>

<wd l="5477" t="12053" r="5789" b="12211">and</wd>

<space/>

</ln>

<ln l="1426" t="12307" r="5794" b="12509" baseLine="12461" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="12360" r="1714" b="12466">use</wd>

<space/>

<wd l="1795" t="12307" r="2059" b="12466">the</wd>

<space/>

<wd l="2146" t="12307" r="3182" b="12466">information</wd>

<space/>

<wd l="3264" t="12307" r="3518" b="12466">for</wd>

<space/>

<wd l="3590" t="12307" r="4301" b="12509">targeted</wd>

<space/>

<wd l="4382" t="12307" r="5395" b="12509">advertising.</wd>

<space/>

<wd l="5486" t="12317" r="5794" b="12466">For</wd>

<space/>

</ln>

<ln l="1430" t="12557" r="5794" b="12758" baseLine="12710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12557" r="2088" b="12715">another</wd>

<space/>

<wd l="2184" t="12557" r="2971" b="12758">example,</wd>

<space/>

<wd l="3072" t="12557" r="3298" b="12758">by</wd>

<space/>

<wd l="3389" t="12557" r="4046" b="12758">reading</wd>

<space/>

<wd l="4147" t="12562" r="4891" b="12715">Amazon</wd>

<space/>

<wd l="4992" t="12581" r="5794" b="12715">customer</wd>

<space/>

</ln>

<ln l="1426" t="12811" r="5794" b="13013" baseLine="12965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="12811" r="2102" b="12970">reviews</wd>

<space/>

<wd l="2213" t="12811" r="2698" b="12970">about</wd>

<space/>

<wd l="2794" t="12864" r="2885" b="12970">a</wd>

<space/>

<wd l="2990" t="12811" r="3581" b="12970">certain</wd>

<space/>

<wd l="3672" t="12811" r="4392" b="13013">product,</wd>

<space/>

<wd l="4507" t="12864" r="4598" b="12970">a</wd>

<space/>

<wd l="4704" t="12811" r="5395" b="13013">shopper</wd>

<space/>

<wd l="5491" t="12864" r="5794" b="12970">can</wd>

<space/>

</ln>

<ln l="1430" t="13066" r="5794" b="13267" baseLine="13219" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="13066" r="2016" b="13224">collect</wd>

<space/>

<wd l="2117" t="13118" r="2208" b="13224">a</wd>

<space/>

<wd l="2314" t="13066" r="2544" b="13224">lot</wd>

<space/>

<wd l="2645" t="13066" r="2846" b="13224">of</wd>

<space/>

<wd l="2918" t="13066" r="3600" b="13267">product</wd>

<space/>

<wd l="3701" t="13066" r="4738" b="13224">information</wd>

<space/>

<wd l="4834" t="13066" r="5165" b="13224">that</wd>

<space/>

<wd l="5270" t="13066" r="5400" b="13224">is</wd>

<space/>

<wd l="5510" t="13090" r="5794" b="13224">not</wd>

<space/>

</ln>

<ln l="1430" t="13320" r="5784" b="13478" baseLine="13474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="13320" r="2213" b="13478">available</wd>

<space/>

<wd l="2318" t="13320" r="2736" b="13478">from</wd>

<space/>

<wd l="2832" t="13320" r="4080" b="13478">manufacturers</wd>

<space/>

<wd l="4186" t="13320" r="4502" b="13478">and</wd>

<space/>

<wd l="4594" t="13320" r="5347" b="13478">retailers.</wd>

<space/>

<wd l="5448" t="13330" r="5784" b="13478">Un-</wd>

</ln>

<ln l="1430" t="13570" r="5784" b="13771" baseLine="13723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="13570" r="2434" b="13771">fortunately,</wd>

<space/>

<wd l="2506" t="13622" r="2875" b="13728">user</wd>

<space/>

<wd l="2933" t="13570" r="3782" b="13771">generated</wd>

<space/>

<wd l="3845" t="13594" r="4493" b="13728">content</wd>

<space/>

<wd l="4550" t="13570" r="5002" b="13728">often</wd>

<space/>

<wd l="5064" t="13570" r="5784" b="13728">contains</wd>

<space/>

</ln>

<ln l="1426" t="13824" r="5784" b="14026" baseLine="13978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13824" r="2741" b="14026">ungrammatical</wd>

<space/>

<wd l="2933" t="13848" r="3677" b="13982">sentence</wd>

<space/>

<wd l="3869" t="13848" r="4709" b="13982">structures</wd>

<space/>

<wd l="4901" t="13824" r="5213" b="13982">and</wd>

<space/>

<wd l="5390" t="13877" r="5784" b="13982">non-</wd>

</ln>

<ln l="1435" t="14078" r="5794" b="14270" baseLine="14232" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="14078" r="2174" b="14237">standard</wd>

<space/>

<wd l="2290" t="14078" r="2870" b="14270">words,</wd>

<space/>

<wd l="3005" t="14078" r="3542" b="14237">which</wd>

<space/>

<wd l="3658" t="14078" r="4301" b="14237">hinders</wd>

<space/>

<wd l="4430" t="14078" r="5342" b="14237">automated</wd>

<space/>

<wd l="5462" t="14102" r="5794" b="14237">text</wd>

<space/>

</ln>

<ln l="1421" t="14328" r="2410" b="14530" baseLine="14482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14328" r="2410" b="14530">processing.</wd>

</ln>

</para>

<para l="1426" t="14582" r="5794" b="15288" alignment="justified" fli="216" lsp="exactly" lspExact="251" language="en">

<ln l="1656" t="14582" r="5784" b="14784" baseLine="14736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1656" t="14592" r="1838" b="14736">In</wd>

<space/>

<wd l="1939" t="14582" r="2251" b="14741">this</wd>

<space/>

<wd l="2357" t="14635" r="2894" b="14784">paper,</wd>

<space/>

<wd l="3010" t="14635" r="3259" b="14741">we</wd>

<space/>

<wd l="3365" t="14606" r="4008" b="14784">present</wd>

<space/>

<wd l="4114" t="14635" r="4205" b="14741">a</wd>

<space/>

<wd l="4320" t="14582" r="5021" b="14741">solution</wd>

<space/>

<wd l="5126" t="14582" r="5458" b="14741">that</wd>

<space/>

<wd l="5563" t="14606" r="5784" b="14741">at-</wd>

</ln>

<ln l="1426" t="14837" r="5794" b="15038" baseLine="14990" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="14861" r="2006" b="15038">tempts</wd>

<space/>

<wd l="2093" t="14861" r="2256" b="14995">to</wd>

<space/>

<wd l="2342" t="14837" r="3053" b="15038">perform</wd>

<space/>

<wd l="3144" t="14837" r="3720" b="14995">lexical</wd>

<space/>

<wd l="3806" t="14837" r="5030" b="14995">normalization</wd>

<space/>

<wd l="5117" t="14837" r="5544" b="15038">(Han</wd>

<space/>

<wd l="5635" t="14861" r="5794" b="14995">et</wd>

<space/>

</ln>

<ln l="1430" t="15086" r="5784" b="15288" baseLine="15240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="15086" r="1685" b="15278">al.,</wd>

<space/>

<wd l="1757" t="15086" r="2261" b="15288">2011)</wd>

<space/>

<wd l="2333" t="15086" r="2587" b="15245">for</wd>

<space/>

<wd l="2645" t="15086" r="3317" b="15288">English</wd>

<space/>

<wd l="3379" t="15086" r="4027" b="15245">Twitter</wd>

<space/>

<wd l="4085" t="15110" r="4416" b="15245">text</wd>

<space/>

<wd l="4469" t="15086" r="4978" b="15245">based</wd>

<space/>

<wd l="5040" t="15139" r="5256" b="15245">on</wd>

<space/>

<wd l="5314" t="15086" r="5784" b="15245">train-</wd>

</ln>

<ln l="0" t="0" r="0" b="0" baseLine="0" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<space/>

</ln>

</para>

</column>

<column l="6130" t="5266" r="10507" b="14824">

<para l="6134" t="5309" r="10502" b="7493" alignment="justified" spaceBefore="1" lsp="exactly" lspExact="252" language="en">

<ln l="6139" t="5309" r="10488" b="5510" baseLine="5462" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="5309" r="6413" b="5510">ing</wd>

<space/>

<wd l="6499" t="5333" r="6830" b="5467">text</wd>

<space/>

<wd l="6917" t="5309" r="7306" b="5467">with</wd>

<space/>

<wd l="7392" t="5309" r="7982" b="5467">human</wd>

<space/>

<wd l="8078" t="5309" r="9000" b="5467">annotation</wd>

<space/>

<wd l="9091" t="5309" r="9907" b="5510">(Baldwin</wd>

<space/>

<wd l="9994" t="5333" r="10147" b="5467">et</wd>

<space/>

<wd l="10238" t="5309" r="10488" b="5501">al.,</wd>

<space/>

</ln>

<ln l="6134" t="5563" r="10502" b="5765" baseLine="5717" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="5563" r="6691" b="5765">2015).</wd>

<space/>

<wd l="6782" t="5563" r="7114" b="5722">The</wd>

<space/>

<wd l="7210" t="5563" r="7906" b="5722">solution</wd>

<space/>

<wd l="7982" t="5563" r="8266" b="5722">has</wd>

<space/>

<wd l="8357" t="5616" r="8448" b="5722">a</wd>

<space/>

<wd l="8530" t="5563" r="9542" b="5722">constrained</wd>

<space/>

<wd l="9619" t="5563" r="10099" b="5722">mode</wd>

<space/>

<wd l="10186" t="5563" r="10502" b="5722">and</wd>

<space/>

</ln>

<ln l="6139" t="5813" r="10493" b="5971" baseLine="5966" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="5866" r="6346" b="5971">an</wd>

<space/>

<wd l="6456" t="5813" r="7694" b="5971">unconstrained</wd>

<space/>

<wd l="7810" t="5813" r="8342" b="5971">mode.</wd>

<space/>

<wd l="8467" t="5813" r="8894" b="5971">Both</wd>

<space/>

<wd l="9010" t="5813" r="9576" b="5971">modes</wd>

<space/>

<wd l="9701" t="5813" r="10109" b="5971">have</wd>

<space/>

<wd l="10234" t="5813" r="10493" b="5971">the</wd>

<space/>

</ln>

<ln l="6144" t="6067" r="10493" b="6269" baseLine="6221" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="6120" r="6581" b="6226">same</wd>

<space/>

<wd l="6667" t="6067" r="7694" b="6226">architecture</wd>

<space/>

<wd l="7781" t="6067" r="8093" b="6226">and</wd>

<space/>

<wd l="8174" t="6091" r="9274" b="6269">components.</wd>

<space/>

<wd l="9360" t="6067" r="9787" b="6226">Both</wd>

<space/>

<wd l="9864" t="6120" r="10152" b="6226">use</wd>

<space/>

<wd l="10234" t="6067" r="10493" b="6226">the</wd>

<space/>

</ln>

<ln l="6139" t="6322" r="10493" b="6523" baseLine="6475">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="6322" r="6989" b="6480">annotated</wd>

<space/>

<wd l="7051" t="6322" r="7733" b="6523">training</wd>

<space/>

<wd l="7805" t="6322" r="8165" b="6480">data</wd>

<space/>

<wd l="8237" t="6322" r="8549" b="6480">and</wd>

<space/>

<wd l="8616" t="6326" r="9264" b="6480">CMU’s</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="9341" t="6322" r="9638" b="6480">ark</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9691" t="6326" r="10085" b="6480">POS</wd>

<space/>

<wd l="10157" t="6346" r="10493" b="6523">tag-</wd>

</run>

</ln>

<ln l="6139" t="6571" r="10502" b="6773" baseLine="6725" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="6624" r="6418" b="6773">ger</wd>

<space/>

<wd l="6475" t="6571" r="7200" b="6773">(Gimpel</wd>

<space/>

<wd l="7267" t="6595" r="7421" b="6730">et</wd>

<space/>

<wd l="7478" t="6571" r="7733" b="6763">al.,</wd>

<space/>

<wd l="7800" t="6571" r="8357" b="6773">2011).</wd>

<space/>

<wd l="8429" t="6571" r="8760" b="6730">The</wd>

<space/>

<wd l="8827" t="6571" r="9706" b="6730">difference</wd>

<space/>

<wd l="9763" t="6571" r="10502" b="6730">between</wd>

<space/>

</ln>

<ln l="6134" t="6826" r="10493" b="7027" baseLine="6979" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6826" r="6571" b="6984">them</wd>

<space/>

<wd l="6643" t="6826" r="6778" b="6984">is</wd>

<space/>

<wd l="6850" t="6850" r="7738" b="7027">parameter</wd>

<space/>

<wd l="7810" t="6826" r="8462" b="7027">settings</wd>

<space/>

<wd l="8539" t="6826" r="8851" b="6984">and</wd>

<space/>

<wd l="8918" t="6826" r="9182" b="6984">the</wd>

<space/>

<wd l="9254" t="6878" r="9749" b="7027">usage</wd>

<space/>

<wd l="9826" t="6826" r="10022" b="6984">of</wd>

<space/>

<wd l="10070" t="6878" r="10162" b="6984">a</wd>

<space/>

<wd l="10234" t="6878" r="10493" b="6984">ca-</wd>

</ln>

<ln l="6134" t="7080" r="10502" b="7282" baseLine="7234" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="7080" r="6778" b="7238">nonical</wd>

<space/>

<wd l="6878" t="7080" r="7522" b="7238">lexicon</wd>

<space/>

<wd l="7618" t="7080" r="8506" b="7282">dictionary</wd>

<space/>

<wd l="8592" t="7080" r="8818" b="7282">by</wd>

<space/>

<wd l="8904" t="7080" r="9168" b="7238">the</wd>

<space/>

<wd l="9264" t="7080" r="10502" b="7238">unconstrained</wd>

<space/>

</ln>

<ln l="6134" t="7334" r="6667" b="7493" baseLine="7488" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="7334" r="6667" b="7493">mode.</wd>

</ln>

</para>

<para l="6130" t="7584" r="10502" b="9557" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6365" t="7584" r="10493" b="7786" baseLine="7738" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6365" t="7584" r="6744" b="7742">This</wd>

<space/>

<wd l="6830" t="7637" r="7325" b="7786">paper</wd>

<space/>

<wd l="7411" t="7584" r="7541" b="7742">is</wd>

<space/>

<wd l="7637" t="7584" r="8501" b="7786">organized</wd>

<space/>

<wd l="8587" t="7637" r="8755" b="7742">as</wd>

<space/>

<wd l="8851" t="7584" r="9552" b="7742">follows:</wd>

<space/>

<wd l="9658" t="7584" r="10310" b="7742">Section</wd>

<space/>

<wd l="10392" t="7589" r="10493" b="7738">2</wd>

<space/>

</ln>

<ln l="6139" t="7838" r="10502" b="8040" baseLine="7992" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6139" t="7838" r="6946" b="7997">describes</wd>

<space/>

<wd l="7013" t="7838" r="7272" b="7997">the</wd>

<space/>

<wd l="7339" t="7838" r="8366" b="7997">architecture</wd>

<space/>

<wd l="8438" t="7838" r="8750" b="7997">and</wd>

<space/>

<wd l="8813" t="7862" r="9859" b="8040">components</wd>

<space/>

<wd l="9936" t="7838" r="10502" b="7997">shared</wd>

<space/>

</ln>

<ln l="6130" t="8093" r="10488" b="8294" baseLine="8246" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="8093" r="6355" b="8294">by</wd>

<space/>

<wd l="6490" t="8093" r="6754" b="8251">the</wd>

<space/>

<wd l="6902" t="8093" r="7910" b="8251">constrained</wd>

<space/>

<wd l="8050" t="8093" r="8366" b="8251">and</wd>

<space/>

<wd l="8501" t="8093" r="9734" b="8251">unconstrained</wd>

<space/>

<wd l="9869" t="8093" r="10488" b="8251">modes.</wd>

<space/>

</ln>

<ln l="6144" t="8342" r="10502" b="8544" baseLine="8496" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="8342" r="6797" b="8501">Section</wd>

<space/>

<wd l="6888" t="8347" r="6974" b="8501">3</wd>

<space/>

<wd l="7080" t="8342" r="7421" b="8501">lists</wd>

<space/>

<wd l="7517" t="8342" r="7944" b="8501">what</wd>

<space/>

<wd l="8030" t="8395" r="8851" b="8501">resources</wd>

<space/>

<wd l="8952" t="8395" r="9211" b="8501">are</wd>

<space/>

<wd l="9302" t="8342" r="9706" b="8501">used</wd>

<space/>

<wd l="9787" t="8342" r="10013" b="8544">by</wd>

<space/>

<wd l="10104" t="8342" r="10502" b="8501">each</wd>

<space/>

</ln>

<ln l="6144" t="8597" r="10502" b="8798" baseLine="8750" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="8621" r="6792" b="8798">system.</wd>

<space/>

<wd l="6902" t="8606" r="7080" b="8750">In</wd>

<space/>

<wd l="7186" t="8597" r="7838" b="8755">Section</wd>

<space/>

<wd l="7934" t="8602" r="8088" b="8789">4,</wd>

<space/>

<wd l="8194" t="8650" r="8448" b="8755">we</wd>

<space/>

<wd l="8554" t="8597" r="9274" b="8755">describe</wd>

<space/>

<wd l="9379" t="8597" r="9638" b="8755">the</wd>

<space/>

<wd l="9749" t="8597" r="10502" b="8755">different</wd>

<space/>

</ln>

<ln l="6144" t="8851" r="10502" b="9053" baseLine="9005" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6144" t="8851" r="6797" b="9053">settings</wd>

<space/>

<wd l="6946" t="8851" r="7147" b="9010">of</wd>

<space/>

<wd l="7262" t="8851" r="7522" b="9010">the</wd>

<space/>

<wd l="7670" t="8851" r="8678" b="9010">constrained</wd>

<space/>

<wd l="8818" t="8851" r="9134" b="9010">and</wd>

<space/>

<wd l="9269" t="8851" r="10502" b="9010">unconstrained</wd>

<space/>

</ln>

<ln l="6134" t="9101" r="10488" b="9302" baseLine="9254" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="9101" r="6701" b="9259">modes</wd>

<space/>

<wd l="6778" t="9101" r="7090" b="9259">and</wd>

<space/>

<wd l="7157" t="9154" r="7906" b="9302">compare</wd>

<space/>

<wd l="7973" t="9101" r="8381" b="9259">their</wd>

<space/>

<wd l="8434" t="9101" r="9595" b="9302">performance.</wd>

<space/>

<wd l="9677" t="9101" r="10330" b="9259">Section</wd>

<space/>

<wd l="10402" t="9110" r="10488" b="9259">5</wd>

<space/>

</ln>

<ln l="6139" t="9355" r="10258" b="9557" baseLine="9509" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6139" t="9355" r="7008" b="9514">concludes</wd>

<space/>

<wd l="7070" t="9355" r="7334" b="9514">the</wd>

<space/>

<wd l="7387" t="9408" r="7886" b="9557">paper</wd>

<space/>

<wd l="7944" t="9355" r="8256" b="9514">and</wd>

<space/>

<wd l="8314" t="9355" r="9120" b="9514">discusses</wd>

<space/>

<wd l="9187" t="9355" r="9701" b="9514">future</wd>

<space/>

<wd l="9763" t="9355" r="10258" b="9514">work.</wd>

</ln>

</para>

<para l="6139" t="9821" r="10493" b="10310" alignment="justified" li="432" spaceBefore="207" fli="-432" lsp="exactly" lspExact="274" language="en">

<ln l="6139" t="9821" r="10493" b="10037" baseLine="9984" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="9821" r="6245" b="9984">2</wd>

<space/>

<wd l="6566" t="9821" r="7867" b="9989">Architecture</wd>

<space/>

<wd l="7987" t="9821" r="8362" b="9989">and</wd>

<space/>

<wd l="8482" t="9821" r="9758" b="10037">Components</wd>

<space/>

<wd l="9878" t="9821" r="10094" b="9989">of</wd>

<space/>

<wd l="10181" t="9821" r="10493" b="9989">the</wd>

<space/>

</ln>

<ln l="6576" t="10094" r="7296" b="10310" baseLine="10258" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6576" t="10094" r="7296" b="10310">System</wd>

</ln>

</para>

<para l="6134" t="10493" r="10502" b="13714" alignment="justified" spaceBefore="156" lsp="exactly" lspExact="253" language="en">

<ln l="6139" t="10493" r="10488" b="10723" baseLine="10676">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="10522" r="6672" b="10680">Given</wd>

<space/>

<wd l="6763" t="10574" r="6854" b="10680">a</wd>

<space/>

<wd l="6941" t="10522" r="7795" b="10680">tokenized</wd>

<space/>

<wd l="7877" t="10522" r="8554" b="10723">English</wd>

<space/>

<wd l="8635" t="10546" r="9115" b="10680">tweet</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="9211" t="10531" r="9341" b="10675">T</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9403" t="10579" r="9523" b="10627">=</wd>

<space/>

</run>

<wd l="9614" t="10522" r="9859" b="10723"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="9960" t="10550" r="10128" b="10714"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="10248" t="10651" r="10488" b="10714">É,</wd>

<space/>

</run>

</ln>

<ln l="6139" t="10776" r="10498" b="10978" baseLine="10931">

<wd l="6139" t="10776" r="6384" b="10978"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="6490" t="10776" r="7018" b="10934">where</wd>

<space/>

</run>

<wd l="7123" t="10805" r="7219" b="10963"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7315" t="10776" r="7450" b="10934">is</wd>

<space/>

<wd l="7550" t="10776" r="7814" b="10934">the</wd>

<space/>

</run>

<wd l="7920" t="10776" r="8218" b="10934"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">i</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-th</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8314" t="10776" r="8798" b="10934">token</wd>

<space/>

<wd l="8899" t="10776" r="9211" b="10934">and</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="9302" t="10834" r="9403" b="10934">n</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9514" t="10776" r="9643" b="10934">is</wd>

<space/>

<wd l="9749" t="10776" r="10008" b="10934">the</wd>

<space/>

<wd l="10109" t="10776" r="10498" b="10934">total</wd>

<space/>

</run>

</ln>

<ln l="6134" t="11026" r="10493" b="11227" baseLine="11179" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="11026" r="6811" b="11184">number</wd>

<space/>

<wd l="6878" t="11026" r="7080" b="11184">of</wd>

<space/>

<wd l="7118" t="11026" r="7738" b="11218">tokens,</wd>

<space/>

<wd l="7819" t="11078" r="8112" b="11184">our</wd>

<space/>

<wd l="8170" t="11026" r="9394" b="11184">normalization</wd>

<space/>

<wd l="9466" t="11050" r="10066" b="11227">system</wd>

<space/>

<wd l="10128" t="11078" r="10493" b="11227">pro-</wd>

</ln>

<ln l="6139" t="11280" r="10493" b="11482" baseLine="11434" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="11333" r="6677" b="11438">cesses</wd>

<space/>

<wd l="6773" t="11333" r="7080" b="11438">one</wd>

<space/>

<wd l="7166" t="11280" r="7656" b="11438">token</wd>

<space/>

<wd l="7747" t="11304" r="7901" b="11438">at</wd>

<space/>

<wd l="7987" t="11333" r="8078" b="11438">a</wd>

<space/>

<wd l="8165" t="11280" r="8549" b="11438">time</wd>

<space/>

<wd l="8645" t="11280" r="8957" b="11438">and</wd>

<space/>

<wd l="9038" t="11280" r="9322" b="11438">has</wd>

<space/>

<wd l="9413" t="11304" r="9734" b="11438">two</wd>

<space/>

<wd l="9830" t="11333" r="10493" b="11482">compo-</wd>

</ln>

<ln l="6134" t="11534" r="10493" b="11736" baseLine="11688" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="11558" r="6643" b="11693">nents:</wd>

<space/>

<wd l="6739" t="11534" r="7570" b="11693">candidate</wd>

<space/>

<wd l="7651" t="11534" r="8578" b="11736">generation</wd>

<space/>

<wd l="8654" t="11534" r="8966" b="11693">and</wd>

<space/>

<wd l="9043" t="11534" r="9874" b="11693">candidate</wd>

<space/>

<wd l="9955" t="11534" r="10493" b="11693">evalu-</wd>

</ln>

<ln l="6139" t="11784" r="10493" b="11986" baseLine="11938">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="11784" r="6619" b="11942">ation.</wd>

<space/>

<wd l="6696" t="11794" r="6926" b="11942">To</wd>

<space/>

<wd l="6994" t="11784" r="7867" b="11942">normalize</wd>

<space/>

<wd l="7934" t="11784" r="8424" b="11942">token</wd>

<space/>

</run>

<wd l="8486" t="11813" r="8626" b="11976"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">i</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8698" t="11784" r="8957" b="11942">the</wd>

<space/>

<wd l="9034" t="11808" r="9634" b="11986">system</wd>

<space/>

<wd l="9701" t="11784" r="10051" b="11942">first</wd>

<space/>

<wd l="10114" t="11837" r="10493" b="11986">gen-</wd>

</run>

</ln>

<ln l="6139" t="12038" r="10488" b="12197" baseLine="12192" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="12062" r="6638" b="12197">erates</wd>

<space/>

<wd l="6739" t="12091" r="6830" b="12197">a</wd>

<space/>

<wd l="6926" t="12038" r="7387" b="12197">small</wd>

<space/>

<wd l="7488" t="12062" r="7723" b="12197">set</wd>

<space/>

<wd l="7814" t="12038" r="8011" b="12197">of</wd>

<space/>

<wd l="8078" t="12038" r="8914" b="12197">candidate</wd>

<space/>

<wd l="9010" t="12038" r="9845" b="12197">canonical</wd>

<space/>

<wd l="9936" t="12038" r="10488" b="12197">forms.</wd>

<space/>

</ln>

<ln l="6139" t="12293" r="10502" b="12451" baseLine="12446" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="12293" r="6586" b="12451">Then</wd>

<space/>

<wd l="6696" t="12293" r="6816" b="12451">it</wd>

<space/>

<wd l="6926" t="12293" r="7781" b="12451">calculates</wd>

<space/>

<wd l="7896" t="12346" r="7987" b="12451">a</wd>

<space/>

<wd l="8102" t="12293" r="9053" b="12451">confidence</wd>

<space/>

<wd l="9178" t="12346" r="9624" b="12451">score</wd>

<space/>

<wd l="9739" t="12293" r="9994" b="12451">for</wd>

<space/>

<wd l="10104" t="12293" r="10502" b="12451">each</wd>

<space/>

</ln>

<ln l="6139" t="12542" r="10502" b="12744" baseLine="12696" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="12542" r="6970" b="12701">candidate</wd>

<space/>

<wd l="7090" t="12542" r="7402" b="12701">and</wd>

<space/>

<wd l="7517" t="12542" r="8083" b="12701">selects</wd>

<space/>

<wd l="8198" t="12542" r="8458" b="12701">the</wd>

<space/>

<wd l="8578" t="12595" r="8885" b="12701">one</wd>

<space/>

<wd l="8995" t="12542" r="9384" b="12701">with</wd>

<space/>

<wd l="9490" t="12542" r="9754" b="12701">the</wd>

<space/>

<wd l="9864" t="12542" r="10502" b="12744">highest</wd>

<space/>

</ln>

<ln l="6139" t="12797" r="10502" b="12955" baseLine="12950" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="12797" r="7094" b="12955">confidence</wd>

<space/>

<wd l="7186" t="12850" r="7637" b="12955">score</wd>

<space/>

<wd l="7723" t="12850" r="7891" b="12955">as</wd>

<space/>

<wd l="7978" t="12797" r="8242" b="12955">the</wd>

<space/>

<wd l="8333" t="12797" r="9163" b="12955">canonical</wd>

<space/>

<wd l="9250" t="12797" r="9672" b="12955">form</wd>

<space/>

<wd l="9754" t="12797" r="9955" b="12955">of</wd>

<space/>

<wd l="10013" t="12797" r="10502" b="12955">token</wd>

<space/>

</ln>

<ln l="6139" t="13051" r="10493" b="13253" baseLine="13205">

<wd l="6139" t="13080" r="6278" b="13238"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">i</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="6355" t="13061" r="6782" b="13210">How</wd>

<space/>

<wd l="6850" t="13075" r="7013" b="13210">to</wd>

<space/>

<wd l="7094" t="13075" r="7829" b="13253">generate</wd>

<space/>

<wd l="7906" t="13051" r="8822" b="13210">candidates</wd>

<space/>

<wd l="8904" t="13051" r="9216" b="13210">and</wd>

<space/>

<wd l="9283" t="13051" r="9662" b="13210">how</wd>

<space/>

<wd l="9730" t="13075" r="9888" b="13210">to</wd>

<space/>

<wd l="9970" t="13051" r="10493" b="13210">calcu-</wd>

</run>

</ln>

<ln l="6139" t="13306" r="10498" b="13507" baseLine="13459" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="13306" r="6446" b="13464">late</wd>

<space/>

<wd l="6542" t="13306" r="7493" b="13464">confidence</wd>

<space/>

<wd l="7594" t="13358" r="8126" b="13464">scores</wd>

<space/>

<wd l="8222" t="13358" r="8482" b="13464">are</wd>

<space/>

<wd l="8578" t="13306" r="9221" b="13464">learned</wd>

<space/>

<wd l="9307" t="13306" r="9730" b="13464">from</wd>

<space/>

<wd l="9816" t="13306" r="10498" b="13507">training</wd>

<space/>

</ln>

<ln l="6139" t="13555" r="6547" b="13714" baseLine="13709" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="13555" r="6547" b="13714">data.</wd>

</ln>

</para>

<para l="6139" t="13978" r="8798" b="14131" alignment="left" spaceBefore="173" lsp="exactly" lspExact="245" language="en">

<ln l="6139" t="13978" r="8798" b="14131" baseLine="14126" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="9">

<wd l="6139" t="13978" r="6394" b="14131">2.1</wd>

<space/>

<wd l="6715" t="13978" r="7685" b="14131">Candidate</wd>

<space/>

<wd l="7747" t="13978" r="8798" b="14131">Generation</wd>

</ln>

</para>

<para l="6139" t="14342" r="9298" b="14530" alignment="left" spaceBefore="116" lsp="exactly" lspExact="253" language="en">

<ln l="6139" t="14342" r="9298" b="14530" baseLine="14497">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="14342" r="6470" b="14501">The</wd>

<space/>

<wd l="6538" t="14342" r="7450" b="14501">candidates</wd>

<space/>

<wd l="7522" t="14342" r="7723" b="14501">of</wd>

<space/>

<wd l="7757" t="14395" r="7848" b="14501">a</wd>

<space/>

<wd l="7906" t="14342" r="8395" b="14501">token</wd>

<space/>

</run>

<wd l="8458" t="14371" r="8549" b="14530"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8611" t="14342" r="9298" b="14501">include:</wd>

</run>

</ln>

</para>

<para l="6437" t="14611" r="8117" b="14770" alignment="left" li="288" spaceBefore="16" lsp="exactly" lspExact="249" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="14611" r="8117" b="14770" baseLine="14765" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6437" t="14621" r="6720" b="14765">•</wd>

<tab position="6518"/>

<wd l="6720" t="14611" r="7046" b="14770">The</wd>

<space/>

<wd l="7109" t="14611" r="7598" b="14770">token</wd>

<space/>

<wd l="7661" t="14611" r="8117" b="14770">itself</wd>

</ln>

</para>

</column>

</section>

<section l="1421" t="15297" r="10507" b="16480">

<column l="1421" t="15297" r="10507" b="16480">

<para l="5809" t="15792" r="6143" b="15946" alignment="centered" spaceBefore="447" lsp="exactly" lspExact="249" language="en">

<ln l="5875" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="37">

<wd l="5875" t="15792" r="6077" b="15946">87</wd>

</ln>

</para>

<para l="2918" t="16133" r="8981" b="16469" alignment="centered" spaceBefore="139" lsp="exactly" lspExact="170" language="en">

<ln l="2918" t="16133" r="8981" b="16301" baseLine="16253" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2918" t="16133" r="3802" b="16296">Proceedings</wd>

<space/>

<wd l="3854" t="16133" r="4018" b="16296">of</wd>

<space/>

<wd l="4037" t="16133" r="4248" b="16262">the</wd>

<space/>

<wd l="4286" t="16138" r="4622" b="16262">ACL</wd>

<space/>

<wd l="4666" t="16133" r="5026" b="16262">2015</wd>

<space/>

<wd l="5078" t="16133" r="5779" b="16296">Workshop</wd>

<space/>

<wd l="5832" t="16176" r="6000" b="16262">on</wd>

<space/>

<wd l="6043" t="16138" r="6456" b="16296">Noisy</wd>

<space/>

<wd l="6518" t="16133" r="7627" b="16296">User-generated</wd>

<space/>

</run>

<wd l="7675" t="16138" r="7992" b="16286"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Text</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8045" t="16171" r="8443" b="16301">pages</wd>

<space/>

<wd l="8506" t="16133" r="8981" b="16286">87–92,
</wd>

</run>

</ln>

<ln l="3029" t="16296" r="8870" b="16469" baseLine="16425" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="16301" r="3595" b="16469">Beijing,</wd>

<space/>

<wd l="3653" t="16301" r="4114" b="16454">China,</wd>

<space/>

<wd l="4166" t="16301" r="4459" b="16469">July</wd>

<space/>

<wd l="4512" t="16301" r="4723" b="16454">31,</wd>

<space/>

<wd l="4781" t="16301" r="5170" b="16430">2015.</wd>

<space/>

</run>

<wd l="5246" t="16296" r="5770" b="16469"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">�</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5822" t="16301" r="6672" b="16430">Association</wd>

<space/>

<wd l="6715" t="16301" r="6926" b="16430">for</wd>

<space/>

<wd l="6974" t="16301" r="8035" b="16469">Computational</wd>

<space/>

<wd l="8078" t="16301" r="8870" b="16469">Linguistics</wd>

</run>

</ln>

</para>

</column>

</section>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4313.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1310" marginTop="1429" marginRight="1296" marginBottom="1292" offsetX="-2" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1310" t="1429" r="10613" b="14942">

<column l="1310" t="1429" r="5904" b="14662">

<para l="1728" t="1488" r="5789" b="2198" alignment="justified" li="648" ri="72" spaceBefore="18" fli="-288" lsp="exactly" lspExact="253" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1728" t="1488" r="5789" b="1646" baseLine="1642" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1728" t="1493" r="2006" b="1642">•</wd>

<tab position="1811"/>

<wd l="2006" t="1488" r="2280" b="1642">All</wd>

<space/>

<wd l="2395" t="1488" r="2957" b="1646">tokens</wd>

<space/>

<wd l="3077" t="1488" r="3408" b="1646">that</wd>

<space/>

<wd l="3518" t="1541" r="3773" b="1646">are</wd>

<space/>

<wd l="3893" t="1488" r="4843" b="1646">considered</wd>

<space/>

<wd l="4954" t="1488" r="5789" b="1646">canonical</wd>

<space/>

</ln>

<ln l="2011" t="1742" r="5784" b="1944" baseLine="1897">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2011" t="1742" r="2510" b="1901">forms</wd>

<space/>

<wd l="2587" t="1742" r="2789" b="1901">of</wd>

<space/>

</run>

<wd l="2832" t="1771" r="2928" b="1930"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">;</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2995" t="1742" r="3163" b="1896">in</wd>

<space/>

<wd l="3226" t="1742" r="3490" b="1901">the</wd>

<space/>

<wd l="3562" t="1742" r="4238" b="1944">training</wd>

<space/>

<wd l="4310" t="1742" r="4670" b="1901">data</wd>

<space/>

<wd l="4742" t="1742" r="5270" b="1944">(static</wd>

<space/>

<wd l="5338" t="1795" r="5784" b="1944">map-</wd>

</run>

</ln>

<ln l="2002" t="1997" r="3408" b="2198" baseLine="2150" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2002" t="1997" r="2390" b="2198">ping</wd>

<space/>

<wd l="2458" t="1997" r="3408" b="2198">dictionary)</wd>

</ln>

</para>

<para l="1728" t="2386" r="5784" b="3096" alignment="justified" li="648" ri="72" spaceBefore="139" fli="-288" lsp="exactly" lspExact="253" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1728" t="2386" r="5784" b="2587" baseLine="2539" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1728" t="2390" r="2006" b="2539">•</wd>

<tab position="1811"/>

<wd l="2006" t="2390" r="2160" b="2539">A</wd>

<space/>

<wd l="2232" t="2386" r="2602" b="2587">split</wd>

<space/>

<wd l="2664" t="2386" r="2995" b="2544">into</wd>

<space/>

<wd l="3062" t="2386" r="3787" b="2587">multiple</wd>

<space/>

<wd l="3859" t="2386" r="4694" b="2544">canonical</wd>

<space/>

<wd l="4762" t="2386" r="5261" b="2544">forms</wd>

<space/>

<wd l="5333" t="2386" r="5486" b="2539">if</wd>

<space/>

<wd l="5525" t="2386" r="5784" b="2544">the</wd>

<space/>

</ln>

<ln l="2006" t="2640" r="5784" b="2842" baseLine="2795">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2006" t="2640" r="2491" b="2798">token</wd>

<space/>

</run>

<wd l="2563" t="2669" r="2659" b="2827"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">;</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2726" t="2640" r="2861" b="2798">is</wd>

<space/>

<wd l="2938" t="2664" r="3216" b="2798">not</wd>

<space/>

<wd l="3288" t="2693" r="3379" b="2798">a</wd>

<space/>

<wd l="3451" t="2640" r="4282" b="2798">canonical</wd>

<space/>

<wd l="4358" t="2640" r="4776" b="2798">form</wd>

<space/>

<wd l="4848" t="2640" r="5179" b="2842">(for</wd>

<space/>

<wd l="5246" t="2693" r="5784" b="2798">exam-</wd>

</run>

</ln>

<ln l="2002" t="2890" r="5376" b="3096" baseLine="3048">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2002" t="2894" r="2318" b="3096">ple,</wd>

<space/>

<wd l="2386" t="2894" r="3624" b="3096">“loveyourcar”</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="3701" t="2890" r="3883" b="3048">4</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3960" t="2894" r="4426" b="3053">“love</wd>

<space/>

<wd l="4488" t="2947" r="4891" b="3096">your</wd>

<space/>

<wd l="4949" t="2894" r="5376" b="3096">car”)</wd>

</run>

</ln>

</para>

<para l="1728" t="3278" r="5794" b="3989" alignment="justified" li="648" ri="72" spaceBefore="134" fli="-288" lsp="exactly" lspExact="253" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1728" t="3278" r="5794" b="3480" baseLine="3432">

<wd l="1728" t="3288" r="2011" b="3432">•</wd>

<tab position="1809"/>

<wd l="2011" t="3288" r="2582" b="3480"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Top-</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">m</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2650" t="3302" r="3077" b="3437">most</wd>

<space/>

<wd l="3144" t="3278" r="3749" b="3437">similar</wd>

<space/>

<wd l="3811" t="3278" r="4646" b="3437">canonical</wd>

<space/>

<wd l="4709" t="3278" r="5213" b="3437">forms</wd>

<space/>

<wd l="5285" t="3278" r="5794" b="3437">found</wd>

<space/>

</run>

</ln>

<ln l="2011" t="3533" r="5784" b="3734" baseLine="3686" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2011" t="3533" r="2174" b="3686">in</wd>

<space/>

<wd l="2242" t="3533" r="2923" b="3734">training</wd>

<space/>

<wd l="3000" t="3533" r="3360" b="3691">data</wd>

<space/>

<wd l="3432" t="3533" r="3778" b="3734">(see</wd>

<space/>

<wd l="3859" t="3533" r="4776" b="3691">subsection</wd>

<space/>

<wd l="4843" t="3538" r="5112" b="3691">2.2</wd>

<space/>

<wd l="5194" t="3533" r="5448" b="3691">for</wd>

<space/>

<wd l="5515" t="3533" r="5784" b="3691">de-</wd>

</ln>

<ln l="2006" t="3787" r="4805" b="3989" baseLine="3941" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2006" t="3787" r="2362" b="3946">tails</wd>

<space/>

<wd l="2429" t="3787" r="2630" b="3946">of</wd>

<space/>

<wd l="2674" t="3787" r="3509" b="3989">similarity</wd>

<space/>

<wd l="3562" t="3787" r="4805" b="3989">measurement)</wd>

</ln>

</para>

<para l="1430" t="4162" r="5794" b="5122" alignment="justified" li="72" ri="72" spaceBefore="121" fli="288" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="4162" r="5794" b="4363" baseLine="4315" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="4162" r="2222" b="4363">Figure</wd>

<space/>

<wd l="2352" t="4166" r="2410" b="4315">1</wd>

<space/>

<wd l="2554" t="4162" r="3082" b="4320">shows</wd>

<space/>

<wd l="3202" t="4214" r="3398" b="4320">an</wd>

<space/>

<wd l="3509" t="4162" r="4243" b="4363">example</wd>

<space/>

<wd l="4358" t="4162" r="4560" b="4320">of</wd>

<space/>

<wd l="4642" t="4162" r="5318" b="4363">training</wd>

<space/>

<wd l="5434" t="4162" r="5794" b="4320">data</wd>

<space/>

</ln>

<ln l="1430" t="4411" r="5765" b="4570" baseLine="4565" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="4411" r="1742" b="4570">and</wd>

<space/>

<wd l="1886" t="4464" r="1978" b="4570">a</wd>

<space/>

<wd l="2117" t="4464" r="2482" b="4570">new</wd>

<space/>

<wd l="2621" t="4435" r="3101" b="4570">tweet</wd>

<space/>

<wd l="3240" t="4411" r="3494" b="4570">for</wd>

<space/>

<wd l="3629" t="4411" r="4896" b="4570">normalization.</wd>

<space/>

<wd l="5050" t="4411" r="5539" b="4570">Table</wd>

<space/>

<wd l="5707" t="4416" r="5765" b="4565">1</wd>

<space/>

</ln>

<ln l="1435" t="4666" r="5789" b="4867" baseLine="4819" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="4666" r="1968" b="4824">shows</wd>

<space/>

<wd l="2054" t="4718" r="2146" b="4824">a</wd>

<space/>

<wd l="2213" t="4666" r="2856" b="4867">portion</wd>

<space/>

<wd l="2928" t="4666" r="3130" b="4824">of</wd>

<space/>

<wd l="3182" t="4666" r="3442" b="4824">the</wd>

<space/>

<wd l="3533" t="4666" r="3979" b="4824">static</wd>

<space/>

<wd l="4056" t="4666" r="4824" b="4867">mapping</wd>

<space/>

<wd l="4906" t="4666" r="5789" b="4867">dictionary</wd>

<space/>

</ln>

<ln l="1430" t="4920" r="4085" b="5122" baseLine="5074" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="4920" r="2074" b="5078">learned</wd>

<space/>

<wd l="2131" t="4920" r="2554" b="5078">from</wd>

<space/>

<wd l="2611" t="4920" r="2875" b="5078">the</wd>

<space/>

<wd l="2933" t="4920" r="3614" b="5122">training</wd>

<space/>

<wd l="3677" t="4920" r="4085" b="5078">data.</wd>

</ln>

</para>

<para l="1421" t="5170" r="5794" b="8366" alignment="justified" li="72" ri="72" spaceAfter="250" fli="288" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="5170" r="5784" b="5362" baseLine="5323" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="5179" r="1963" b="5328">For</wd>

<space/>

<wd l="2035" t="5170" r="2525" b="5328">token</wd>

<space/>

<wd l="2602" t="5174" r="2971" b="5328">“ur”</wd>

<space/>

<wd l="3058" t="5170" r="3221" b="5323">in</wd>

<space/>

<wd l="3298" t="5170" r="3562" b="5328">the</wd>

<space/>

<wd l="3643" t="5222" r="4008" b="5328">new</wd>

<space/>

<wd l="4085" t="5194" r="4603" b="5362">tweet,</wd>

<space/>

<wd l="4690" t="5170" r="4949" b="5328">the</wd>

<space/>

<wd l="5030" t="5170" r="5520" b="5328">token</wd>

<space/>

<wd l="5602" t="5170" r="5784" b="5328">it-</wd>

</ln>

<ln l="1435" t="5424" r="5784" b="5626" baseLine="5578" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="5424" r="1766" b="5582">self</wd>

<space/>

<wd l="1838" t="5424" r="1973" b="5582">is</wd>

<space/>

<wd l="2074" t="5429" r="2491" b="5582">“ur”.</wd>

<space/>

<wd l="2592" t="5424" r="2870" b="5578">All</wd>

<space/>

<wd l="2971" t="5424" r="3173" b="5582">of</wd>

<space/>

<wd l="3245" t="5424" r="3437" b="5582">its</wd>

<space/>

<wd l="3533" t="5424" r="4253" b="5626">possible</wd>

<space/>

<wd l="4354" t="5424" r="5189" b="5582">canonical</wd>

<space/>

<wd l="5285" t="5424" r="5784" b="5582">forms</wd>

<space/>

</ln>

<ln l="1421" t="5678" r="5794" b="5880" baseLine="5832" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5702" r="2064" b="5880">present</wd>

<space/>

<wd l="2174" t="5678" r="2342" b="5832">in</wd>

<space/>

<wd l="2448" t="5678" r="2712" b="5837">the</wd>

<space/>

<wd l="2822" t="5678" r="3504" b="5880">training</wd>

<space/>

<wd l="3619" t="5678" r="3979" b="5837">data</wd>

<space/>

<wd l="4094" t="5731" r="4354" b="5837">are</wd>

<space/>

<wd l="4469" t="5683" r="4896" b="5880">“you</wd>

<space/>

<wd l="5006" t="5683" r="5362" b="5837">are”</wd>

<space/>

<wd l="5482" t="5678" r="5794" b="5837">and</wd>

<space/>

</ln>

<ln l="1430" t="5933" r="5789" b="6134" baseLine="6086">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="5938" r="2069" b="6134">“your”.</wd>

<space/>

<wd l="2184" t="5942" r="2477" b="6091">Let</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2578" t="5990" r="2726" b="6091">m</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2842" t="5990" r="2962" b="6038">=</wd>

<space/>

<wd l="3091" t="5938" r="3221" b="6125">1,</wd>

<space/>

<wd l="3331" t="5933" r="3595" b="6091">the</wd>

<space/>

<wd l="3706" t="5957" r="4133" b="6091">most</wd>

<space/>

<wd l="4243" t="5933" r="4848" b="6091">similar</wd>

<space/>

<wd l="4954" t="5933" r="5789" b="6091">canonical</wd>

<space/>

</run>

</ln>

<ln l="1430" t="6182" r="5784" b="6384" baseLine="6336" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="6182" r="1853" b="6341">form</wd>

<space/>

<wd l="1925" t="6182" r="2059" b="6341">is</wd>

<space/>

<wd l="2136" t="6187" r="2774" b="6384">“your”.</wd>

<space/>

<wd l="2856" t="6182" r="3758" b="6374">Therefore,</wd>

<space/>

<wd l="3835" t="6182" r="4099" b="6341">the</wd>

<space/>

<wd l="4176" t="6182" r="5093" b="6341">candidates</wd>

<space/>

<wd l="5170" t="6182" r="5371" b="6341">of</wd>

<space/>

<wd l="5419" t="6187" r="5784" b="6341">“ur”</wd>

<space/>

</ln>

<ln l="1430" t="6437" r="5794" b="6638" baseLine="6590" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="6437" r="2069" b="6595">include</wd>

<space/>

<wd l="2184" t="6442" r="2597" b="6629">“ur”,</wd>

<space/>

<wd l="2717" t="6442" r="3144" b="6638">“you</wd>

<space/>

<wd l="3250" t="6442" r="3605" b="6595">are”</wd>

<space/>

<wd l="3720" t="6437" r="4032" b="6595">and</wd>

<space/>

<wd l="4142" t="6442" r="4776" b="6638">“your”.</wd>

<space/>

<wd l="4891" t="6446" r="5203" b="6595">For</wd>

<space/>

<wd l="5304" t="6437" r="5794" b="6595">token</wd>

<space/>

</ln>

<ln l="1430" t="6691" r="5784" b="6883" baseLine="6845" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="6691" r="2213" b="6850">“looove”</wd>

<space/>

<wd l="2338" t="6691" r="2501" b="6845">in</wd>

<space/>

<wd l="2616" t="6691" r="2875" b="6850">the</wd>

<space/>

<wd l="2995" t="6744" r="3360" b="6850">new</wd>

<space/>

<wd l="3470" t="6715" r="3989" b="6883">tweet,</wd>

<space/>

<wd l="4114" t="6691" r="4378" b="6850">the</wd>

<space/>

<wd l="4493" t="6691" r="4982" b="6850">token</wd>

<space/>

<wd l="5098" t="6691" r="5554" b="6850">itself</wd>

<space/>

<wd l="5650" t="6691" r="5784" b="6850">is</wd>

<space/>

</ln>

<ln l="1430" t="6941" r="5794" b="7142" baseLine="7094" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="6941" r="2266" b="7099">“looove”.</wd>

<space/>

<wd l="2371" t="6950" r="2501" b="7099">It</wd>

<space/>

<wd l="2597" t="6941" r="2726" b="7099">is</wd>

<space/>

<wd l="2832" t="6941" r="3389" b="7099">absent</wd>

<space/>

<wd l="3485" t="6941" r="3648" b="7094">in</wd>

<space/>

<wd l="3739" t="6941" r="4003" b="7099">the</wd>

<space/>

<wd l="4099" t="6941" r="4776" b="7142">training</wd>

<space/>

<wd l="4877" t="6941" r="5280" b="7133">data,</wd>

<space/>

<wd l="5395" t="6994" r="5568" b="7099">so</wd>

<space/>

<wd l="5674" t="6941" r="5794" b="7099">it</wd>

<space/>

</ln>

<ln l="1430" t="7195" r="5784" b="7354" baseLine="7349" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7195" r="1819" b="7354">does</wd>

<space/>

<wd l="1915" t="7219" r="2198" b="7354">not</wd>

<space/>

<wd l="2285" t="7195" r="2698" b="7354">have</wd>

<space/>

<wd l="2794" t="7195" r="2990" b="7354">its</wd>

<space/>

<wd l="3091" t="7248" r="3466" b="7354">own</wd>

<space/>

<wd l="3557" t="7195" r="4392" b="7354">canonical</wd>

<space/>

<wd l="4488" t="7195" r="4906" b="7354">form</wd>

<space/>

<wd l="5002" t="7195" r="5784" b="7354">available</wd>

<space/>

</ln>

<ln l="1430" t="7450" r="5784" b="7651" baseLine="7603" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7502" r="1603" b="7608">as</wd>

<space/>

<wd l="1728" t="7450" r="2698" b="7608">candidates.</wd>

<space/>

<wd l="2822" t="7454" r="3480" b="7651">Among</wd>

<space/>

<wd l="3605" t="7450" r="3816" b="7608">all</wd>

<space/>

<wd l="3936" t="7450" r="4200" b="7608">the</wd>

<space/>

<wd l="4325" t="7450" r="5160" b="7608">canonical</wd>

<space/>

<wd l="5285" t="7450" r="5784" b="7608">forms</wd>

<space/>

</ln>

<ln l="1421" t="7699" r="5784" b="7901" baseLine="7853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7723" r="2064" b="7901">present</wd>

<space/>

<wd l="2131" t="7699" r="2299" b="7853">in</wd>

<space/>

<wd l="2366" t="7699" r="3043" b="7901">training</wd>

<space/>

<wd l="3120" t="7699" r="3523" b="7891">data,</wd>

<space/>

<wd l="3610" t="7699" r="4445" b="7858">canonical</wd>

<space/>

<wd l="4517" t="7699" r="4934" b="7858">form</wd>

<space/>

<wd l="5011" t="7699" r="5573" b="7858">“love”</wd>

<space/>

<wd l="5650" t="7699" r="5784" b="7858">is</wd>

<space/>

</ln>

<ln l="1426" t="7954" r="5784" b="8146" baseLine="8107" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="7978" r="1858" b="8112">most</wd>

<space/>

<wd l="1958" t="7954" r="2563" b="8112">similar</wd>

<space/>

<wd l="2654" t="7978" r="2818" b="8112">to</wd>

<space/>

<wd l="2923" t="7954" r="3758" b="8112">“looove”.</wd>

<space/>

<wd l="3869" t="7954" r="4771" b="8146">Therefore,</wd>

<space/>

<wd l="4877" t="7954" r="5141" b="8112">the</wd>

<space/>

<wd l="5246" t="7954" r="5784" b="8112">candi-</wd>

</ln>

<ln l="1430" t="8208" r="5563" b="8366" baseLine="8362" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="8208" r="1872" b="8366">dates</wd>

<space/>

<wd l="1939" t="8208" r="2141" b="8366">of</wd>

<space/>

<wd l="2174" t="8208" r="2962" b="8366">“looove”</wd>

<space/>

<wd l="3024" t="8208" r="3662" b="8366">include</wd>

<space/>

<wd l="3730" t="8208" r="4512" b="8366">“looove”</wd>

<space/>

<wd l="4579" t="8208" r="4891" b="8366">and</wd>

<space/>

<wd l="4949" t="8208" r="5563" b="8366">“love”.</wd>

</ln>

</para>

<picture l="1685" t="8669" r="4824" b="12163" alignment="left" li="375" ri="1080" spaceAfter="85">

</picture>

<para l="1426" t="12283" r="5794" b="12619" alignment="justified" li="72" ri="72" spaceAfter="170" lsp="exactly" lspExact="206" language="en">

<ln l="1426" t="12283" r="5794" b="12446" baseLine="12408" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="12283" r="1934" b="12446">Figure</wd>

<space/>

<wd l="2045" t="12283" r="2170" b="12413">1:</wd>

<space/>

<wd l="2280" t="12288" r="2506" b="12408">An</wd>

<space/>

<wd l="2606" t="12288" r="3283" b="12446">Example</wd>

<space/>

<wd l="3389" t="12283" r="3552" b="12413">of</wd>

<space/>

<wd l="3638" t="12283" r="4310" b="12446">Training</wd>

<space/>

<wd l="4411" t="12288" r="4776" b="12413">Data</wd>

<space/>

<wd l="4882" t="12288" r="5165" b="12413">and</wd>

<space/>

<wd l="5270" t="12322" r="5352" b="12413">a</wd>

<space/>

<wd l="5453" t="12288" r="5794" b="12413">New</wd>

<space/>

</ln>

<ln l="1430" t="12490" r="3322" b="12619" baseLine="12614" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12494" r="1896" b="12619">Tweet</wd>

<space/>

<wd l="1944" t="12490" r="2170" b="12619">for</wd>

<space/>

<wd l="2218" t="12490" r="3322" b="12619">Normalization</wd>

</ln>

</para>

<table l="1310" t="12854" r="5904" b="14246" alignment="left" spaceBefore="14" spaceAfter="14">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<gridTable>

<gridCol>2237</gridCol>

<gridCol>2357</gridCol>

<gridRow>226</gridRow>

<gridRow>192</gridRow>

<gridRow>192</gridRow>

<gridRow>196</gridRow>

<gridRow>192</gridRow>

<gridRow>197</gridRow>

<gridRow>197</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="1426" t="12907" r="2323" b="13070" alignment="left" li="116" lsp="exactly" lspExact="199" language="en">

<ln l="1426" t="12907" r="2323" b="13070" baseLine="13032" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="12912" r="1738" b="13070">Key</wd>

<space/>

<wd l="1786" t="12907" r="2323" b="13070">(token)</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="3658" t="12907" r="5491" b="13070" alignment="left" li="111" lsp="exactly" lspExact="199" language="en">

<ln l="3658" t="12907" r="5491" b="13070" baseLine="13032" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3658" t="12912" r="4104" b="13037">Value</wd>

<space/>

<wd l="4157" t="12907" r="4944" b="13070">(canonical</wd>

<space/>

<wd l="4992" t="12907" r="5491" b="13070">forms)</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="1430" t="13123" r="1699" b="13238" alignment="left" li="116" lsp="exactly" lspExact="182" language="en">

<ln l="1430" t="13123" r="1699" b="13238" baseLine="13234" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13123" r="1699" b="13238">“ur”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="3662" t="13123" r="4786" b="13272" alignment="left" li="111" lsp="exactly" lspExact="182" language="en">

<ln l="3662" t="13123" r="4786" b="13272" baseLine="13234" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3662" t="13123" r="4128" b="13272">“your”,</wd>

<space/>

<wd l="4181" t="13123" r="4488" b="13272">“you</wd>

<space/>

<wd l="4531" t="13123" r="4786" b="13238">are”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="1430" t="13272" r="1709" b="13430" alignment="left" li="116" lsp="exactly" lspExact="192" language="en">

<ln l="1430" t="13253" r="1709" b="13430" baseLine="13426" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13315" r="1709" b="13430">“so”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="3662" t="13272" r="3941" b="13430" alignment="left" li="111" lsp="exactly" lspExact="192" language="en">

<ln l="3662" t="13253" r="3941" b="13430" baseLine="13426" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3662" t="13315" r="3941" b="13430">“so”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="1430" t="13512" r="1920" b="13627" alignment="left" li="116" lsp="exactly" lspExact="184" language="en">

<ln l="1430" t="13512" r="1920" b="13627" baseLine="13622" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="13512" r="1920" b="13627">“niiice”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="3662" t="13512" r="4061" b="13627" alignment="left" li="111" lsp="exactly" lspExact="184" language="en">

<ln l="3662" t="13512" r="4061" b="13627" baseLine="13622" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3662" t="13512" r="4061" b="13627">“nice”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="1430" t="13704" r="1771" b="13819" alignment="left" li="116" lsp="exactly" lspExact="178" language="en">

<ln l="1430" t="13704" r="1771" b="13819" baseLine="13814" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13704" r="1771" b="13819">“luv”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="3662" t="13704" r="4070" b="13819" alignment="left" li="111" lsp="exactly" lspExact="178" language="en">

<ln l="3662" t="13704" r="4070" b="13819" baseLine="13814" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3662" t="13704" r="4070" b="13819">“love”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="1430" t="13852" r="1762" b="14011" alignment="left" li="116" lsp="exactly" lspExact="188" language="en">

<ln l="1430" t="13834" r="1762" b="14011" baseLine="14006" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13896" r="1762" b="14011">“car”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="3662" t="13852" r="3994" b="14011" alignment="left" li="111" lsp="exactly" lspExact="188" language="en">

<ln l="3662" t="13834" r="3994" b="14011" baseLine="14006" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3662" t="13896" r="3994" b="14011">“car”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="1430" t="14093" r="2064" b="14208" alignment="left" li="116" lsp="exactly" lspExact="192" language="en">

<ln l="1430" t="14093" r="2064" b="14208" baseLine="14203" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14093" r="2064" b="14208">“welcme”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="3662" t="14093" r="4373" b="14208" alignment="left" li="111" lsp="exactly" lspExact="192" language="en">

<ln l="3662" t="14093" r="4373" b="14208" baseLine="14203" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3662" t="14093" r="4373" b="14208">“welcome”</wd>

</ln>

</para>

</cell>

</table>

<para l="1430" t="14290" r="5270" b="14654" alignment="left" li="72" ri="648" lsp="exactly" lspExact="200" language="en">

<ln l="1430" t="14290" r="5270" b="14453" baseLine="14414" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14294" r="1862" b="14419">Table</wd>

<space/>

<wd l="1920" t="14294" r="2050" b="14419">1:</wd>

<space/>

<wd l="2112" t="14290" r="2544" b="14419">Static</wd>

<space/>

<wd l="2592" t="14290" r="3288" b="14453">Mapping</wd>

<space/>

<wd l="3336" t="14290" r="4157" b="14453">Dictionary</wd>

<space/>

<wd l="4200" t="14294" r="4848" b="14419">Learned</wd>

<space/>

<wd l="4901" t="14290" r="5270" b="14419">from</wd>

<space/>

</ln>

<ln l="1430" t="14491" r="2520" b="14654" baseLine="14616" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14491" r="2102" b="14654">Training</wd>

<space/>

<wd l="2150" t="14496" r="2520" b="14621">Data</wd>

</ln>

</para>

</column>

<column l="6019" t="1429" r="10613" b="14942">

<para l="6139" t="1478" r="8242" b="1675" alignment="left" li="72" ri="72" spaceBefore="10" lsp="exactly" lspExact="245" language="en">

<ln l="6139" t="1478" r="8242" b="1675" baseLine="1627" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">

<wd l="6139" t="1478" r="6403" b="1632">2.2</wd>

<space/>

<wd l="6720" t="1478" r="7651" b="1675">Similarity</wd>

<space/>

<wd l="7709" t="1483" r="8242" b="1632">Index</wd>

</ln>

</para>

<para l="6134" t="1848" r="10502" b="3062" alignment="justified" li="72" ri="72" spaceBefore="121" lsp="exactly" lspExact="253" language="en">

<ln l="6134" t="1848" r="10502" b="2050" baseLine="2002" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="1858" r="6432" b="2006">We</wd>

<space/>

<wd l="6542" t="1901" r="7267" b="2006">measure</wd>

<space/>

<wd l="7387" t="1848" r="8222" b="2050">similarity</wd>

<space/>

<wd l="8318" t="1848" r="9058" b="2006">between</wd>

<space/>

<wd l="9158" t="1872" r="9480" b="2006">two</wd>

<space/>

<wd l="9600" t="1848" r="10171" b="2050">strings</wd>

<space/>

<wd l="10277" t="1848" r="10502" b="2050">by</wd>

<space/>

</ln>

<ln l="6139" t="2102" r="10493" b="2304" baseLine="2256" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2102" r="6490" b="2261">first</wd>

<space/>

<wd l="6571" t="2102" r="7656" b="2304">representing</wd>

<space/>

<wd l="7742" t="2102" r="8141" b="2261">each</wd>

<space/>

<wd l="8232" t="2102" r="8722" b="2304">string</wd>

<space/>

<wd l="8808" t="2102" r="9197" b="2261">with</wd>

<space/>

<wd l="9283" t="2155" r="9374" b="2261">a</wd>

<space/>

<wd l="9466" t="2126" r="9706" b="2261">set</wd>

<space/>

<wd l="9787" t="2102" r="9989" b="2261">of</wd>

<space/>

<wd l="10056" t="2102" r="10493" b="2261">simi-</wd>

</ln>

<ln l="6139" t="2357" r="10502" b="2558" baseLine="2510" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2357" r="6600" b="2558">larity</wd>

<space/>

<wd l="6662" t="2357" r="7349" b="2515">features</wd>

<space/>

<wd l="7421" t="2357" r="7733" b="2515">and</wd>

<space/>

<wd l="7790" t="2357" r="8170" b="2515">then</wd>

<space/>

<wd l="8237" t="2357" r="9144" b="2558">evaluating</wd>

<space/>

<wd l="9216" t="2357" r="10051" b="2558">similarity</wd>

<space/>

<wd l="10109" t="2357" r="10502" b="2515">with</wd>

<space/>

</ln>

<ln l="6134" t="2606" r="10493" b="2808" baseLine="2760" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="2606" r="6797" b="2765">Jaccard</wd>

<space/>

<wd l="6874" t="2606" r="7363" b="2765">Index</wd>

<space/>

<wd l="7445" t="2606" r="8746" b="2808">(Levandowsky</wd>

<space/>

<wd l="8822" t="2630" r="8981" b="2765">et</wd>

<space/>

<wd l="9053" t="2606" r="9307" b="2798">al.,</wd>

<space/>

<wd l="9413" t="2606" r="9898" b="2808">1971)</wd>

<space/>

<wd l="9979" t="2606" r="10181" b="2765">of</wd>

<space/>

<wd l="10234" t="2606" r="10493" b="2765">the</wd>

<space/>

</ln>

<ln l="6134" t="2861" r="8458" b="3062" baseLine="3014" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="2885" r="6456" b="3019">two</wd>

<space/>

<wd l="6528" t="2861" r="7363" b="3062">similarity</wd>

<space/>

<wd l="7426" t="2861" r="8021" b="3019">feature</wd>

<space/>

<wd l="8093" t="2885" r="8458" b="3019">sets.</wd>

</ln>

</para>

<para l="6130" t="3115" r="10502" b="6341" alignment="justified" li="72" ri="72" spaceAfter="221" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6365" t="3115" r="10493" b="3317" baseLine="3269">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="3115" r="6696" b="3274">The</wd>

<space/>

<wd l="6782" t="3115" r="7618" b="3317">similarity</wd>

<space/>

<wd l="7694" t="3115" r="8376" b="3274">features</wd>

<space/>

<wd l="8458" t="3115" r="8659" b="3274">of</wd>

<space/>

<wd l="8712" t="3168" r="8803" b="3274">a</wd>

<space/>

<wd l="8885" t="3115" r="9370" b="3317">string</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9442" t="3173" r="9528" b="3274">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9605" t="3115" r="10238" b="3274">include</wd>

<space/>

</run>

<wd l="10315" t="3173" r="10493" b="3274"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-</run>

</wd>

</ln>

<ln l="6139" t="3365" r="10502" b="3566" baseLine="3518">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="3418" r="6662" b="3566">grams</wd>

<space/>

<wd l="6754" t="3365" r="7066" b="3523">and</wd>

<space/>

</run>

<wd l="7142" t="3365" r="8467" b="3566"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">k</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-skip-</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-grams</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8558" t="3365" r="8726" b="3518">in</wd>

<space/>

</run>

<wd l="8794" t="3422" r="8928" b="3523"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9024" t="3374" r="9202" b="3518">In</wd>

<space/>

<wd l="9278" t="3365" r="9586" b="3523">this</wd>

<space/>

<wd l="9667" t="3418" r="10205" b="3566">paper,</wd>

<space/>

<wd l="10296" t="3418" r="10502" b="3523">an</wd>

<space/>

</run>

</ln>

<ln l="6134" t="3619" r="10493" b="3821" baseLine="3773">

<wd l="6134" t="3672" r="6768" b="3821"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-gram</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="6850" t="3619" r="7018" b="3773">in</wd>

<space/>

<wd l="7099" t="3619" r="7584" b="3821">string</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7661" t="3677" r="7747" b="3778">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7829" t="3619" r="7958" b="3778">is</wd>

<space/>

<wd l="8050" t="3619" r="8702" b="3778">defined</wd>

<space/>

<wd l="8779" t="3672" r="8952" b="3778">as</wd>

<space/>

<wd l="9038" t="3672" r="9130" b="3778">a</wd>

<space/>

<wd l="9211" t="3619" r="10162" b="3821">contiguous</wd>

<space/>

<wd l="10253" t="3672" r="10493" b="3778">se-</wd>

</run>

</ln>

<ln l="6139" t="3874" r="10502" b="4075" baseLine="4027">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="3926" r="6754" b="4075">quence</wd>

<space/>

<wd l="6850" t="3874" r="7051" b="4032">of</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7114" t="3931" r="7214" b="4032">n</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7301" t="3874" r="8179" b="4032">characters</wd>

<space/>

<wd l="8275" t="3874" r="8443" b="4027">in</wd>

<space/>

</run>

<wd l="8525" t="3931" r="8654" b="4032"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8755" t="3878" r="8909" b="4027">A</wd>

<space/>

</run>

<wd l="9000" t="3874" r="10243" b="4075"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">k</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-skip-</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-gram</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="10334" t="3874" r="10502" b="4027">in</wd>

<space/>

</run>

</ln>

<ln l="6144" t="4123" r="10493" b="4325" baseLine="4277">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6144" t="4123" r="6634" b="4325">string</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6715" t="4181" r="6802" b="4282">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6898" t="4123" r="7027" b="4282">is</wd>

<space/>

<wd l="7128" t="4176" r="7219" b="4282">a</wd>

<space/>

<wd l="7310" t="4123" r="8549" b="4325">generalization</wd>

<space/>

<wd l="8640" t="4123" r="8842" b="4282">of</wd>

<space/>

</run>

<wd l="8904" t="4176" r="9533" b="4325"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-gram</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9619" t="4123" r="10013" b="4282">with</wd>

<space/>

<wd l="10099" t="4176" r="10493" b="4325">gaps</wd>

<space/>

</run>

</ln>

<ln l="6130" t="4378" r="10493" b="4579" baseLine="4531" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="4378" r="6869" b="4536">between</wd>

<space/>

<wd l="6950" t="4378" r="7829" b="4536">characters</wd>

<space/>

<wd l="7915" t="4378" r="8227" b="4536">and</wd>

<space/>

<wd l="8309" t="4378" r="8443" b="4536">is</wd>

<space/>

<wd l="8530" t="4378" r="9187" b="4536">defined</wd>

<space/>

<wd l="9269" t="4430" r="9437" b="4536">as</wd>

<space/>

<wd l="9528" t="4430" r="9619" b="4536">a</wd>

<space/>

<wd l="9706" t="4430" r="10493" b="4579">sequence</wd>

<space/>

</ln>

<ln l="6139" t="4632" r="10493" b="4790" baseLine="4786">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="4632" r="6341" b="4790">of</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6389" t="4690" r="6490" b="4790">n</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6562" t="4632" r="7440" b="4790">characters</wd>

<space/>

<wd l="7517" t="4632" r="8050" b="4790">where</wd>

<space/>

<wd l="8126" t="4632" r="8390" b="4790">the</wd>

<space/>

<wd l="8467" t="4632" r="9355" b="4790">maximum</wd>

<space/>

<wd l="9432" t="4632" r="10142" b="4790">distance</wd>

<space/>

<wd l="10214" t="4632" r="10493" b="4790">be-</wd>

</run>

</ln>

<ln l="6134" t="4886" r="10493" b="5088" baseLine="5040">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="4910" r="6662" b="5045">tween</wd>

<space/>

<wd l="6734" t="4910" r="7056" b="5045">two</wd>

<space/>

<wd l="7142" t="4886" r="8021" b="5045">characters</wd>

<space/>

<wd l="8112" t="4886" r="8242" b="5045">is</wd>

<space/>

</run>

<wd l="8328" t="4886" r="8467" b="5045"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">k</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8554" t="4896" r="8856" b="5045">We</wd>

<space/>

<wd l="8928" t="4886" r="9643" b="5088">prepend</wd>

<space/>

<wd l="9725" t="4886" r="10493" b="5088">(append)</wd>

<space/>

</run>

</ln>

<ln l="6139" t="5131" r="10498" b="5338" baseLine="5290">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="5189" r="6230" b="5294">a</wd>

<space/>

<wd l="6346" t="5131" r="6638" b="5309">“$”</wd>

<space/>

<wd l="6749" t="5160" r="6912" b="5294">to</wd>

<space/>

</run>

<wd l="7027" t="5189" r="7738" b="5338"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-grams</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7853" t="5136" r="8184" b="5294">that</wd>

<space/>

<wd l="8294" t="5189" r="8880" b="5338">appear</wd>

<space/>

<wd l="8986" t="5160" r="9144" b="5294">at</wd>

<space/>

<wd l="9245" t="5136" r="9509" b="5294">the</wd>

<space/>

<wd l="9614" t="5136" r="10498" b="5338">beginning</wd>

<space/>

</run>

</ln>

<ln l="6139" t="5390" r="10502" b="5592" baseLine="5544" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="5390" r="6595" b="5592">(end)</wd>

<space/>

<wd l="6667" t="5390" r="6869" b="5549">of</wd>

<space/>

<wd l="6907" t="5390" r="7166" b="5549">the</wd>

<space/>

<wd l="7243" t="5390" r="7781" b="5592">string.</wd>

<space/>

<wd l="7853" t="5400" r="8150" b="5549">We</wd>

<space/>

<wd l="8218" t="5443" r="8506" b="5549">use</wd>

<space/>

<wd l="8578" t="5390" r="8808" b="5592">“|”</wd>

<space/>

<wd l="8875" t="5414" r="9038" b="5549">to</wd>

<space/>

<wd l="9110" t="5390" r="9797" b="5549">indicate</wd>

<space/>

<wd l="9869" t="5443" r="10258" b="5592">gaps</wd>

<space/>

<wd l="10334" t="5390" r="10502" b="5544">in</wd>

<space/>

</ln>

<ln l="6144" t="5645" r="10493" b="5846" baseLine="5798" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5645" r="7157" b="5846">skip-grams.</wd>

<space/>

<wd l="7224" t="5654" r="7536" b="5803">For</wd>

<space/>

<wd l="7594" t="5645" r="8376" b="5846">example,</wd>

<space/>

<wd l="8453" t="5645" r="8942" b="5803">Table</wd>

<space/>

<wd l="9005" t="5650" r="9106" b="5798">2</wd>

<space/>

<wd l="9182" t="5645" r="9715" b="5803">shows</wd>

<space/>

<wd l="9782" t="5645" r="10046" b="5803">the</wd>

<space/>

<wd l="10118" t="5645" r="10493" b="5803">sim-</wd>

</ln>

<ln l="6139" t="5894" r="10498" b="6096" baseLine="6048" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="5894" r="6662" b="6096">ilarity</wd>

<space/>

<wd l="6730" t="5894" r="7330" b="6053">feature</wd>

<space/>

<wd l="7411" t="5918" r="7723" b="6053">sets</wd>

<space/>

<wd l="7800" t="5894" r="8002" b="6053">of</wd>

<space/>

<wd l="8050" t="5894" r="8664" b="6086">“love”,</wd>

<space/>

<wd l="8746" t="5894" r="9576" b="6086">“looove”,</wd>

<space/>

<wd l="9658" t="5899" r="10109" b="6053">“car”</wd>

<space/>

<wd l="10186" t="5894" r="10498" b="6053">and</wd>

<space/>

</ln>

<ln l="6139" t="6149" r="8290" b="6341" baseLine="6302">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="6154" r="6629" b="6341">“cat”,</wd>

<space/>

<wd l="6696" t="6149" r="7090" b="6307">with</wd>

<space/>

</run>

<wd l="7142" t="6154" r="7478" b="6307"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">=2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7546" t="6149" r="7862" b="6307">and</wd>

<space/>

</run>

<wd l="7915" t="6149" r="8290" b="6307"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">k</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">=1.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<table l="6019" t="6605" r="10613" b="7608" alignment="left" spaceBefore="14" spaceAfter="14">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<gridTable>

<gridCol>835</gridCol>

<gridCol>3759</gridCol>

<gridRow>226</gridRow>

<gridRow>192</gridRow>

<gridRow>196</gridRow>

<gridRow>192</gridRow>

<gridRow>197</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="6662" r="6610" b="6826" alignment="left" li="120" lsp="exactly" lspExact="194" language="en">

<ln l="6139" t="6662" r="6610" b="6826" baseLine="6787" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6139" t="6662" r="6610" b="6826">String</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="6662" r="8659" b="6826" alignment="left" li="111" lsp="exactly" lspExact="194" language="en">

<ln l="6965" t="6662" r="8659" b="6826" baseLine="6787" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6965" t="6662" r="7733" b="6826">Similarity</wd>

<space/>

<wd l="7776" t="6667" r="8371" b="6792">Feature</wd>

<space/>

<wd l="8424" t="6662" r="8659" b="6792">Set</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="6869" r="6547" b="6989" alignment="left" li="120" lsp="exactly" lspExact="181" language="en">

<ln l="6139" t="6869" r="6547" b="6989" baseLine="6984" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6139" t="6874" r="6547" b="6989">“love”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="6831" r="8923" b="7022" alignment="left" li="111" lsp="exactly" lspExact="181" language="en">

<ln l="6965" t="6787" r="8923" b="7022" baseLine="6984" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6965" t="6869" r="7339" b="7013">“$lo”,</wd>

<space/>

<wd l="7392" t="6874" r="7723" b="7013">“ov”,</wd>

<space/>

<wd l="7776" t="6869" r="8174" b="7013">“ve$”,</wd>

<space/>

<wd l="8227" t="6874" r="8554" b="7022">“l|v”,</wd>

<space/>

<wd l="8606" t="6874" r="8923" b="7022">“o|e”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="7061" r="6710" b="7181" alignment="left" li="120" lsp="exactly" lspExact="191" language="en">

<ln l="6139" t="7061" r="6710" b="7181" baseLine="7176" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="7066" r="6710" b="7181">“looove”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="7023" r="10133" b="7214" alignment="left" li="111" lsp="exactly" lspExact="191" language="en">

<ln l="6965" t="6979" r="10133" b="7214" baseLine="7176" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6965" t="7061" r="7339" b="7205">“$lo”,</wd>

<space/>

<wd l="7392" t="7066" r="7723" b="7205">“oo”,</wd>

<space/>

<wd l="7776" t="7066" r="8107" b="7205">“ov”,</wd>

<space/>

<wd l="8155" t="7061" r="8558" b="7205">“ve$”,</wd>

<space/>

<wd l="8611" t="7066" r="8938" b="7214">“l|o”,</wd>

<space/>

<wd l="8986" t="7066" r="9350" b="7214">“o|o”,</wd>

<space/>

<wd l="9403" t="7066" r="9763" b="7214">“o|v”,</wd>

<space/>

<wd l="9816" t="7066" r="10133" b="7214">“o|e”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="7219" r="6470" b="7378" alignment="left" li="120" lsp="exactly" lspExact="182" language="en">

<ln l="6139" t="7190" r="6470" b="7378" baseLine="7373" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6139" t="7262" r="6470" b="7378">“car”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="7219" r="8126" b="7411" alignment="left" li="111" lsp="exactly" lspExact="182" language="en">

<ln l="6965" t="7190" r="8126" b="7411" baseLine="7373" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6965" t="7258" r="7358" b="7402">“$ca”,</wd>

<space/>

<wd l="7411" t="7258" r="7786" b="7402">“ar$”,</wd>

<space/>

<wd l="7834" t="7262" r="8126" b="7411">“c|r”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="7454" r="6461" b="7570" alignment="left" li="120" lsp="exactly" lspExact="192" language="en">

<ln l="6139" t="7454" r="6461" b="7570" baseLine="7565" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6139" t="7454" r="6461" b="7570">“cat”</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="7450" r="8107" b="7603" alignment="left" li="111" lsp="exactly" lspExact="192" language="en">

<ln l="6965" t="7450" r="8107" b="7603" baseLine="7565" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6965" t="7450" r="7358" b="7594">“$ca”,</wd>

<space/>

<wd l="7411" t="7450" r="7776" b="7594">“at$”,</wd>

<space/>

<wd l="7829" t="7454" r="8107" b="7603">“c|t”</wd>

</ln>

</para>

</cell>

</table>

<para l="6139" t="7646" r="10320" b="7810" alignment="left" li="72" ri="72" spaceAfter="202" lsp="exactly" lspExact="196" language="en">

<ln l="6139" t="7646" r="10320" b="7810" baseLine="7771">

<run bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="7651" r="6571" b="7776">Table</wd>

<space/>

<wd l="6619" t="7651" r="6758" b="7776">2:</wd>

<space/>

<wd l="6816" t="7651" r="7042" b="7771">An</wd>

<space/>

<wd l="7090" t="7651" r="7766" b="7810">Example</wd>

<space/>

<wd l="7819" t="7646" r="7982" b="7776">of</wd>

<space/>

<wd l="8016" t="7646" r="8779" b="7810">Similarity</wd>

<space/>

<wd l="8827" t="7651" r="9490" b="7776">Features</wd>

<space/>

</run>

<wd l="9542" t="7646" r="9936" b="7810"><run bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run bold="true" italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">=2,</run>

</wd>

<run bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="9984" t="7646" r="10320" b="7810"><run bold="true" italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">k</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">=1)</run>

</wd>

<run bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6096" t="8064" r="10493" b="8774" alignment="justified" li="72" ri="72" fli="216" lsp="exactly" lspExact="177" language="en">

<ln l="6360" t="8064" r="10493" b="8266" baseLine="8218">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="8074" r="6658" b="8222">Let</wd>

<space/>

<wd l="6744" t="8064" r="7008" b="8222">the</wd>

<space/>

<wd l="7114" t="8064" r="7949" b="8266">similarity</wd>

<space/>

<wd l="8040" t="8064" r="8640" b="8222">feature</wd>

<space/>

<wd l="8746" t="8088" r="8981" b="8222">set</wd>

<space/>

<wd l="9072" t="8064" r="9274" b="8222">of</wd>

<space/>

<wd l="9346" t="8117" r="9437" b="8222">a</wd>

<space/>

<wd l="9538" t="8064" r="10027" b="8266">string</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="10114" t="8122" r="10200" b="8222">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="10286" t="8064" r="10493" b="8222">be</wd>

<space/>

</run>

</ln>

<ln l="6096" t="8318" r="10493" b="8520" baseLine="8473">

<wd l="6096" t="8318" r="6470" b="8520"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">f</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="6542" t="8318" r="6922" b="8477">then</wd>

<space/>

<wd l="6979" t="8371" r="7229" b="8477">we</wd>

<space/>

<wd l="7296" t="8371" r="8026" b="8477">measure</wd>

<space/>

<wd l="8098" t="8318" r="8587" b="8520">string</wd>

<space/>

<wd l="8659" t="8318" r="9494" b="8520">similarity</wd>

<space/>

<wd l="9547" t="8318" r="10286" b="8477">between</wd>

<space/>

</run>

<wd l="10339" t="8376" r="10493" b="8501"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6139" t="8573" r="6984" b="8774" baseLine="8731">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="8573" r="6451" b="8731">and</wd>

<space/>

</run>

<wd l="6504" t="8630" r="6662" b="8755"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6715" t="8573" r="6984" b="8774">by:</wd>

</run>

</ln>

</para>

<para l="6178" t="8827" r="7723" b="9029" alignment="left" li="72" ri="72" lsp="exactly" lspExact="422" language="en" id="_1_2_184">

<ln l="6178" t="8827" r="7723" b="9029" baseLine="8987">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13"><wd l="6178" t="8827" r="7171" b="9029">similarity</wd>

<space/>

</run>

<wd l="7277" t="8875" r="7502" b="9024"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13">𝑠</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13"><space/>

</run>

<wd l="7550" t="8875" r="7723" b="9024"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13">𝑠</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13">Z</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="13"><nl orig="true"/>

</run>

</ln>

</para>

<para l="7627" t="9106" r="10253" b="9307" alignment="left" li="1584" ri="72" lsp="exactly" lspExact="363" language="en" id="_1_2_185">

<ln l="7627" t="9106" r="10253" b="9307" baseLine="9264">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21"><wd l="7627" t="9168" r="7762" b="9230">=</wd>

<space/>

<wd l="7814" t="9106" r="9163" b="9302">Jaccardindex</wd>

<space/>

<wd l="9264" t="9106" r="9389" b="9307">𝑓</wd>

<space/>

</run>

<wd l="9494" t="9154" r="9662" b="9302"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21">𝑠</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21"><wd l="9773" t="9230" r="9811" b="9293">,</wd>

<space/>

<wd l="9850" t="9106" r="9974" b="9307">𝑓</wd>

<space/>

</run>

<wd l="10080" t="9154" r="10253" b="9302"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21">𝑠</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21">Z</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="21"><nl orig="true"/>

</run>

</ln>

</para>

<para l="7862" t="9355" r="9211" b="9562" alignment="centered" li="72" ri="72" lsp="exactly" lspExact="303" language="en">

<ln l="7862" t="9355" r="9211" b="9562" baseLine="9518">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11"><wd l="7862" t="9360" r="8030" b="9562">|𝑓</wd>

<space/>

</run>

<wd l="8050" t="9360" r="8390" b="9562"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11">(𝑠</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11"><space/>

<wd l="8472" t="9384" r="8592" b="9514">∩</wd>

<space/>

</run>

<wd l="8654" t="9360" r="9211" b="9562"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11">𝑓(𝑠</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11">z</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11">)|</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="11"><nl orig="true"/>

</run>

</ln>

</para>

<para l="7862" t="9634" r="9211" b="9878" alignment="centered" li="72" ri="72" lsp="exactly" lspExact="399" language="en">

<ln l="7862" t="9634" r="9211" b="9878" baseLine="9834">

<wd l="7862" t="9677" r="8390" b="9878"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16">|𝑓(𝑠</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16"><space/>

<wd l="8472" t="9706" r="8592" b="9835">∪</wd>

<space/>

</run>

<wd l="8654" t="9677" r="9211" b="9878"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16">𝑓(𝑠</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16">)|</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Cambria Math" fontFamily="roman" fontPitch="variable" spacing="16"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6139" t="9931" r="10493" b="10637" alignment="justified" li="72" ri="72" fli="216" lsp="exactly" lspExact="195" language="en">

<ln l="6360" t="9931" r="10493" b="10133" baseLine="10085" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="9941" r="6672" b="10090">For</wd>

<space/>

<wd l="6744" t="9931" r="7531" b="10133">example,</wd>

<space/>

<wd l="7618" t="9931" r="7786" b="10085">in</wd>

<space/>

<wd l="7862" t="9931" r="8352" b="10090">Table</wd>

<space/>

<wd l="8429" t="9936" r="8582" b="10123">2,</wd>

<space/>

<wd l="8674" t="9931" r="9235" b="10090">“love”</wd>

<space/>

<wd l="9322" t="9931" r="9634" b="10090">and</wd>

<space/>

<wd l="9710" t="9931" r="10493" b="10090">“looove”</wd>

<space/>

</ln>

<ln l="6144" t="10176" r="10488" b="10382" baseLine="10334" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="10181" r="6595" b="10339">share</wd>

<space/>

<wd l="6744" t="10181" r="7579" b="10382">similarity</wd>

<space/>

<wd l="7723" t="10181" r="8405" b="10339">features</wd>

<space/>

<wd l="8578" t="10176" r="9178" b="10382">{“$lo”,</wd>

<space/>

<wd l="9331" t="10186" r="9782" b="10373">“ov”,</wd>

<space/>

<wd l="9936" t="10176" r="10488" b="10373">“ve$”,</wd>

<space/>

</ln>

<ln l="6139" t="10435" r="10493" b="10637" baseLine="10589" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="10435" r="6730" b="10637">“o|e”}.</wd>

<space/>

<wd l="6826" t="10435" r="7157" b="10594">The</wd>

<space/>

<wd l="7238" t="10435" r="7742" b="10594">union</wd>

<space/>

<wd l="7824" t="10435" r="8026" b="10594">of</wd>

<space/>

<wd l="8078" t="10435" r="8486" b="10594">their</wd>

<space/>

<wd l="8573" t="10435" r="9403" b="10637">similarity</wd>

<space/>

<wd l="9485" t="10435" r="10085" b="10594">feature</wd>

<space/>

<wd l="10181" t="10459" r="10493" b="10594">sets</wd>

</ln>

</para>

<para l="6139" t="10608" r="10502" b="11352" alignment="justified" li="72" ri="72" lsp="exactly" lspExact="253" language="en">

<tabs position="6139"/>

<ln l="6139" t="10608" r="10488" b="10891" baseLine="10843" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8"><wd l="6139" t="10742" r="6274" b="10848">is</wd>

<space/>

<wd l="6398" t="10685" r="6998" b="10891">{“$10”,</wd>

<space/>

<wd l="7104" t="10694" r="7555" b="10882">“ooÓ,</wd>

<space/>

<wd l="7661" t="10694" r="8117" b="10882">“ov”,</wd>

<space/>

<wd l="8222" t="10694" r="8309" b="10752">“</wd>

<tab position="8309"/>

</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8"><wd l="9989" t="10694" r="10075" b="10752">66</wd>

<space/>

</run>

<wd l="8880" t="10690" r="9331" b="10891"><run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">6,</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">11Vee</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">&apos;</run>

</wd>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8"><space/>

</run>

<wd l="9437" t="10690" r="9883" b="10891"><run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">6,7</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">10</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">99</run>

<run underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">&apos;</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8"><space/>

<wd l="9989" t="10690" r="10488" b="10891">cc010”,
</wd>

</run>

</ln>

<ln l="6139" t="10762" r="10502" b="11141" baseLine="11056" forcedEOF="true">

<wd l="6139" t="10776" r="6677" b="11141"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">“o|v”</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">,</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">D</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8"><space/>

<wd l="6826" t="10939" r="7416" b="11141">“o|e”}.</wd>

<space/>

<wd l="7608" t="10939" r="7939" b="11098">The</wd>

<space/>

</run>

<wd l="8126" t="10766" r="9029" b="11141"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">similarity</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8"><space/>

</run>

<wd l="9139" t="10766" r="9590" b="11098"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">score</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8"><space/>

<wd l="9763" t="10939" r="10502" b="11098">between</wd>

<space/>

</run>

</ln>

<ln l="6139" t="11194" r="9130" b="11352" baseLine="11347" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8" forcedEOF="true">

<wd l="6139" t="11194" r="6701" b="11352">“love”</wd>

<space/>

<wd l="6768" t="11194" r="7080" b="11352">and</wd>

<space/>

<wd l="7142" t="11194" r="7925" b="11352">“looove”</wd>

<space/>

<wd l="7992" t="11194" r="8122" b="11352">is</wd>

<space/>

<wd l="8189" t="11194" r="8458" b="11352">4/9</wd>

<space/>

<wd l="8525" t="11251" r="8645" b="11299">=</wd>

<space/>

<wd l="8707" t="11198" r="9130" b="11352">0.44.</wd>

</ln>

</para>

<para l="6134" t="11448" r="10502" b="13882" alignment="justified" li="72" ri="72" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6360" t="11448" r="10502" b="11650" baseLine="11602" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="11448" r="7171" b="11606">Different</wd>

<space/>

<wd l="7243" t="11448" r="7920" b="11650">weights</wd>

<space/>

<wd l="8006" t="11501" r="8304" b="11606">can</wd>

<space/>

<wd l="8376" t="11448" r="8582" b="11606">be</wd>

<space/>

<wd l="8669" t="11448" r="9422" b="11650">assigned</wd>

<space/>

<wd l="9499" t="11472" r="9658" b="11606">to</wd>

<space/>

<wd l="9749" t="11448" r="10502" b="11606">different</wd>

<space/>

</ln>

<ln l="6144" t="11702" r="10502" b="11904" baseLine="11856" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="11702" r="6979" b="11904">similarity</wd>

<space/>

<wd l="7118" t="11702" r="7800" b="11861">features</wd>

<space/>

<wd l="7944" t="11702" r="8424" b="11861">when</wd>

<space/>

<wd l="8563" t="11702" r="9518" b="11904">calculating</wd>

<space/>

<wd l="9667" t="11702" r="10502" b="11904">similarity</wd>

<space/>

</ln>

<ln l="6144" t="11952" r="10493" b="12154" baseLine="12106">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6144" t="12005" r="6677" b="12110">scores</wd>

<space/>

<wd l="6816" t="11952" r="7512" b="12110">because</wd>

<space/>

</run>

<wd l="7656" t="12005" r="8366" b="12154"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-grams</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8520" t="11976" r="8674" b="12110">at</wd>

<space/>

<wd l="8818" t="11952" r="9571" b="12110">different</wd>

<space/>

<wd l="9701" t="11952" r="10493" b="12154">positions</wd>

<space/>

</run>

</ln>

<ln l="6134" t="12206" r="10502" b="12408" baseLine="12360" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="12206" r="6542" b="12365">have</wd>

<space/>

<wd l="6653" t="12206" r="7406" b="12365">different</wd>

<space/>

<wd l="7512" t="12206" r="8486" b="12408">importance</wd>

<space/>

<wd l="8597" t="12206" r="8851" b="12365">for</wd>

<space/>

<wd l="8947" t="12206" r="9398" b="12365">word</wd>

<space/>

<wd l="9499" t="12206" r="10502" b="12408">recognition</wd>

<space/>

</ln>

<ln l="6139" t="12461" r="10493" b="12662" baseLine="12614" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="12461" r="6739" b="12662">(White</wd>

<space/>

<wd l="6811" t="12485" r="6965" b="12619">et</wd>

<space/>

<wd l="7027" t="12461" r="7277" b="12653">al.,</wd>

<space/>

<wd l="7349" t="12461" r="7906" b="12662">2008).</wd>

<space/>

<wd l="7978" t="12470" r="8285" b="12619">For</wd>

<space/>

<wd l="8342" t="12461" r="9130" b="12662">example,</wd>

<space/>

<wd l="9202" t="12461" r="9370" b="12614">in</wd>

<space/>

<wd l="9427" t="12461" r="9691" b="12619">the</wd>

<space/>

<wd l="9758" t="12461" r="10493" b="12662">example</wd>

<space/>

</ln>

<ln l="6144" t="12710" r="10493" b="12912" baseLine="12864" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="12710" r="6710" b="12869">shown</wd>

<space/>

<wd l="6787" t="12710" r="6955" b="12864">in</wd>

<space/>

<wd l="7027" t="12710" r="7517" b="12869">Table</wd>

<space/>

<wd l="7598" t="12715" r="7752" b="12902">2,</wd>

<space/>

<wd l="7834" t="12763" r="8083" b="12869">we</wd>

<space/>

<wd l="8165" t="12763" r="8462" b="12869">can</wd>

<space/>

<wd l="8544" t="12710" r="9091" b="12912">assign</wd>

<space/>

<wd l="9163" t="12710" r="9763" b="12912">weight</wd>

<space/>

<wd l="9835" t="12715" r="9922" b="12869">3</wd>

<space/>

<wd l="10013" t="12734" r="10176" b="12869">to</wd>

<space/>

<wd l="10253" t="12710" r="10493" b="12869">bi-</wd>

</ln>

<ln l="6139" t="12965" r="10502" b="13166" baseLine="13118" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="13018" r="6662" b="13166">grams</wd>

<space/>

<wd l="6778" t="12989" r="6931" b="13123">at</wd>

<space/>

<wd l="7032" t="12965" r="7291" b="13123">the</wd>

<space/>

<wd l="7397" t="12965" r="8275" b="13166">beginning</wd>

<space/>

<wd l="8386" t="12965" r="8698" b="13123">and</wd>

<space/>

<wd l="8803" t="12965" r="9115" b="13123">end</wd>

<space/>

<wd l="9216" t="12965" r="9418" b="13123">of</wd>

<space/>

<wd l="9504" t="12965" r="10075" b="13166">strings</wd>

<space/>

<wd l="10186" t="12965" r="10502" b="13123">and</wd>

<space/>

</ln>

<ln l="6134" t="13219" r="10502" b="13421" baseLine="13373" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="13219" r="6734" b="13421">weight</wd>

<space/>

<wd l="6816" t="13224" r="6874" b="13373">1</wd>

<space/>

<wd l="6960" t="13243" r="7123" b="13378">to</wd>

<space/>

<wd l="7195" t="13219" r="7646" b="13378">other</wd>

<space/>

<wd l="7709" t="13219" r="8443" b="13411">features,</wd>

<space/>

<wd l="8520" t="13219" r="8832" b="13378">and</wd>

<space/>

<wd l="8890" t="13219" r="9269" b="13378">then</wd>

<space/>

<wd l="9331" t="13219" r="9590" b="13378">the</wd>

<space/>

<wd l="9667" t="13219" r="10502" b="13421">similarity</wd>

<space/>

</ln>

<ln l="6144" t="13474" r="10493" b="13632" baseLine="13627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="13526" r="6595" b="13632">score</wd>

<space/>

<wd l="6739" t="13474" r="7474" b="13632">between</wd>

<space/>

<wd l="7622" t="13474" r="8184" b="13632">“love”</wd>

<space/>

<wd l="8338" t="13474" r="8650" b="13632">and</wd>

<space/>

<wd l="8798" t="13474" r="9581" b="13632">“looove”</wd>

<space/>

<wd l="9725" t="13474" r="10493" b="13632">becomes</wd>

<space/>

</ln>

<ln l="6144" t="13723" r="7301" b="13882" baseLine="13877" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="13723" r="6509" b="13882">8/13</wd>

<space/>

<wd l="6581" t="13781" r="6701" b="13829">=</wd>

<space/>

<wd l="6763" t="13728" r="7301" b="13882">0.615.</wd>

</ln>

</para>

<para l="6134" t="13978" r="10493" b="14938" alignment="justified" li="72" ri="72" fli="216" lsp="exactly" lspExact="250" language="en">

<ln l="6365" t="13978" r="10493" b="14179" baseLine="14131" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="13978" r="6696" b="14136">The</wd>

<space/>

<wd l="6802" t="13978" r="7637" b="14179">similarity</wd>

<space/>

<wd l="7728" t="13978" r="8328" b="14136">feature</wd>

<space/>

<wd l="8434" t="14002" r="8669" b="14136">set</wd>

<space/>

<wd l="8765" t="13978" r="9725" b="14136">calculation</wd>

<space/>

<wd l="9816" t="14030" r="10114" b="14136">can</wd>

<space/>

<wd l="10205" t="14030" r="10493" b="14136">use</wd>

<space/>

</ln>

<ln l="6134" t="14232" r="10488" b="14434" baseLine="14386">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="14232" r="6864" b="14434">multiple</wd>

<space/>

</run>

<wd l="6936" t="14232" r="7162" b="14434"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="7238" t="14232" r="7402" b="14434"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">k</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7478" t="14232" r="8736" b="14434">configurations</wd>

<space/>

<wd l="8813" t="14232" r="9432" b="14390">instead</wd>

<space/>

<wd l="9499" t="14232" r="9701" b="14390">of</wd>

<space/>

<wd l="9725" t="14232" r="10066" b="14434">just</wd>

<space/>

<wd l="10133" t="14285" r="10488" b="14390">one.</wd>

<space/>

</run>

</ln>

<ln l="6134" t="14482" r="10493" b="14683" baseLine="14635" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="14491" r="6446" b="14640">For</wd>

<space/>

<wd l="6557" t="14482" r="7339" b="14683">example,</wd>

<space/>

<wd l="7464" t="14482" r="7728" b="14640">the</wd>

<space/>

<wd l="7853" t="14482" r="8688" b="14683">similarity</wd>

<space/>

<wd l="8803" t="14482" r="9403" b="14640">feature</wd>

<space/>

<wd l="9533" t="14506" r="9768" b="14640">set</wd>

<space/>

<wd l="9883" t="14534" r="10176" b="14640">can</wd>

<space/>

<wd l="10291" t="14482" r="10493" b="14640">be</wd>

<space/>

</ln>

<ln l="6139" t="14736" r="10493" b="14938" baseLine="14890" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="14736" r="7027" b="14938">composed</wd>

<space/>

<wd l="7133" t="14736" r="7334" b="14894">of</wd>

<space/>

<wd l="7406" t="14736" r="8165" b="14938">bigrams,</wd>

<space/>

<wd l="8280" t="14736" r="9053" b="14938">trigrams,</wd>

<space/>

<wd l="9192" t="14736" r="10493" b="14938">1-skip-bigrams</wd>

</ln>

</para>

</column>

</section>

<dd l="7195" t="8818" r="7814" b="9024" anchorTo="toPage" refTo="_1_2_184">

<picture l="7195" t="8818" r="7814" b="9024" alignment="left">

</picture>

</dd>

<dd l="9192" t="9062" r="10445" b="9326" anchorTo="toPage" refTo="_1_2_185">

<picture l="9192" t="9062" r="10445" b="9326" alignment="left">

</picture>

</dd>

<dd l="6019" t="9370" r="7862" b="9868">

<picture l="7622" t="9586" r="7766" b="9658" alignment="left" li="1603" ri="96" spaceBefore="216" spaceAfter="210">

</picture>

</dd>

<dd l="5776" t="15746" r="6176" b="15975">

<para l="5809" t="15792" r="6143" b="15946" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5875" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="28">

<wd l="5875" t="15792" r="6077" b="15946">88</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4313.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1323" marginTop="1427" marginRight="1283" marginBottom="1292" offsetX="-14" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1323" t="1427" r="10626" b="15440">

<column l="1323" t="1427" r="5917" b="15440">

<para l="1430" t="1474" r="5784" b="1930" alignment="justified" li="72" ri="72" spaceBefore="5" lsp="exactly" lspExact="253" language="en">

<ln l="1430" t="1474" r="5784" b="1675" baseLine="1627">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="1474" r="1742" b="1632">and</wd>

<space/>

<wd l="1800" t="1474" r="3125" b="1675">2-skip-bigrams</wd>

<space/>

<wd l="3187" t="1474" r="3950" b="1675">together.</wd>

<space/>

<wd l="4022" t="1474" r="4190" b="1627">If</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4219" t="1474" r="4325" b="1632">k</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4373" t="1531" r="4493" b="1579">=</wd>

<space/>

<wd l="4560" t="1478" r="4709" b="1666">0,</wd>

<space/>

<wd l="4781" t="1474" r="4901" b="1632">it</wd>

<space/>

<wd l="4954" t="1526" r="5506" b="1632">means</wd>

<space/>

<wd l="5573" t="1526" r="5784" b="1632">no</wd>

<space/>

</run>

</ln>

<ln l="1435" t="1728" r="3024" b="1930" baseLine="1882" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="1728" r="2314" b="1930">skip-gram</wd>

<space/>

<wd l="2376" t="1728" r="2510" b="1886">is</wd>

<space/>

<wd l="2573" t="1728" r="3024" b="1886">used.</wd>

</ln>

</para>

<para l="1421" t="1982" r="5813" b="12302" alignment="justified" li="72" ri="72" spaceBefore="50" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1656" t="1982" r="5784" b="2184" baseLine="2136" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="1982" r="2035" b="2141">This</wd>

<space/>

<wd l="2112" t="1982" r="2947" b="2184">similarity</wd>

<space/>

<wd l="3010" t="2006" r="4181" b="2141">measurement</wd>

<space/>

<wd l="4234" t="1982" r="5050" b="2184">penalizes</wd>

<space/>

<wd l="5122" t="2006" r="5453" b="2141">text</wd>

<space/>

<wd l="5515" t="1982" r="5784" b="2141">ed-</wd>

</ln>

<ln l="1430" t="2237" r="5779" b="2429" baseLine="2390" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="2237" r="1627" b="2395">its</wd>

<space/>

<wd l="1752" t="2237" r="2146" b="2395">such</wd>

<space/>

<wd l="2256" t="2290" r="2424" b="2395">as</wd>

<space/>

<wd l="2544" t="2237" r="3350" b="2429">insertion,</wd>

<space/>

<wd l="3475" t="2237" r="4181" b="2395">deletion</wd>

<space/>

<wd l="4291" t="2237" r="4603" b="2395">and</wd>

<space/>

<wd l="4718" t="2237" r="5779" b="2395">substitution.</wd>

<space/>

</ln>

<ln l="1430" t="2486" r="5784" b="2688" baseLine="2640" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="2486" r="2342" b="2688">Compared</wd>

<space/>

<wd l="2563" t="2486" r="2957" b="2645">with</wd>

<space/>

<wd l="3178" t="2486" r="4253" b="2645">Levenshtein</wd>

<space/>

<wd l="4478" t="2486" r="5189" b="2645">distance</wd>

<space/>

<wd l="5419" t="2486" r="5784" b="2688">(Le-</wd>

</ln>

<ln l="1426" t="2741" r="5784" b="2942" baseLine="2894" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2741" r="2314" b="2933">venshtein,</wd>

<space/>

<wd l="2434" t="2741" r="2966" b="2942">1966),</wd>

<space/>

<wd l="3067" t="2794" r="3370" b="2899">one</wd>

<space/>

<wd l="3466" t="2741" r="4603" b="2942">disadvantage</wd>

<space/>

<wd l="4699" t="2741" r="4901" b="2899">of</wd>

<space/>

<wd l="4968" t="2794" r="5261" b="2899">our</wd>

<space/>

<wd l="5347" t="2741" r="5784" b="2899">simi-</wd>

</ln>

<ln l="1430" t="2995" r="5779" b="3197" baseLine="3149" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="2995" r="1891" b="3197">larity</wd>

<space/>

<wd l="1987" t="3019" r="3158" b="3154">measurement</wd>

<space/>

<wd l="3259" t="2995" r="3394" b="3154">is</wd>

<space/>

<wd l="3499" t="2995" r="3830" b="3154">that</wd>

<space/>

<wd l="3926" t="3019" r="4243" b="3154">two</wd>

<space/>

<wd l="4354" t="2995" r="5107" b="3154">different</wd>

<space/>

<wd l="5213" t="2995" r="5779" b="3197">strings</wd>

<space/>

</ln>

<ln l="1426" t="3245" r="5784" b="3446" baseLine="3398" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="3298" r="1805" b="3446">may</wd>

<space/>

<wd l="1886" t="3245" r="2294" b="3403">have</wd>

<space/>

<wd l="2410" t="3250" r="2650" b="3403">1.0</wd>

<space/>

<wd l="2750" t="3245" r="3586" b="3446">similarity</wd>

<space/>

<wd l="3677" t="3298" r="4123" b="3403">score</wd>

<space/>

<wd l="4205" t="3245" r="4901" b="3403">because</wd>

<space/>

<wd l="4987" t="3245" r="5251" b="3403">the</wd>

<space/>

<wd l="5347" t="3245" r="5784" b="3403">simi-</wd>

</ln>

<ln l="1430" t="3499" r="5794" b="3701" baseLine="3653" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="3499" r="1891" b="3701">larity</wd>

<space/>

<wd l="1963" t="3499" r="2563" b="3658">feature</wd>

<space/>

<wd l="2645" t="3523" r="2880" b="3658">set</wd>

<space/>

<wd l="2952" t="3552" r="3254" b="3658">can</wd>

<space/>

<wd l="3322" t="3499" r="3710" b="3701">only</wd>

<space/>

<wd l="3782" t="3523" r="4416" b="3701">capture</wd>

<space/>

<wd l="4498" t="3499" r="4915" b="3658">local</wd>

<space/>

<wd l="4992" t="3499" r="5794" b="3658">character</wd>

<space/>

</ln>

<ln l="1430" t="3754" r="5784" b="3955" baseLine="3907" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="3754" r="1896" b="3912">order</wd>

<space/>

<wd l="1958" t="3754" r="3038" b="3912">information.</wd>

<space/>

<wd l="3110" t="3763" r="3422" b="3912">For</wd>

<space/>

<wd l="3490" t="3754" r="4272" b="3955">example,</wd>

<space/>

<wd l="4358" t="3754" r="4925" b="3955">strings</wd>

<space/>

<wd l="5002" t="3754" r="5784" b="3912">“aaabaa”</wd>

<space/>

</ln>

<ln l="1430" t="4003" r="5794" b="4205" baseLine="4157" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="4003" r="1742" b="4162">and</wd>

<space/>

<wd l="1838" t="4003" r="2717" b="4162">“aaaabaa”</wd>

<space/>

<wd l="2818" t="4003" r="3226" b="4162">have</wd>

<space/>

<wd l="3326" t="4003" r="3955" b="4205">exactly</wd>

<space/>

<wd l="4046" t="4003" r="4310" b="4162">the</wd>

<space/>

<wd l="4416" t="4056" r="4853" b="4162">same</wd>

<space/>

<wd l="4958" t="4003" r="5794" b="4205">similarity</wd>

<space/>

</ln>

<ln l="1430" t="4253" r="5779" b="4459" baseLine="4411" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="4258" r="2030" b="4416">feature</wd>

<space/>

<wd l="2102" t="4282" r="2338" b="4416">set</wd>

<space/>

<wd l="2419" t="4253" r="3043" b="4459">{“$aa”,</wd>

<space/>

<wd l="3110" t="4258" r="3552" b="4450">“ab”,</wd>

<space/>

<wd l="3624" t="4258" r="4066" b="4450">“ba”,</wd>

<space/>

<wd l="4138" t="4253" r="4680" b="4450">“aa$”,</wd>

<space/>

<wd l="4747" t="4258" r="5222" b="4459">“a|a”,</wd>

<space/>

<wd l="5294" t="4258" r="5779" b="4459">“a|b”,</wd>

<space/>

</ln>

<ln l="1430" t="4512" r="5784" b="4714" baseLine="4666" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="4512" r="1954" b="4714">“b|a”}</wd>

<space/>

<wd l="2045" t="4512" r="2362" b="4670">and</wd>

<space/>

<wd l="2424" t="4512" r="2779" b="4670">thus</wd>

<space/>

<wd l="2851" t="4512" r="3259" b="4670">have</wd>

<space/>

<wd l="3355" t="4517" r="3595" b="4670">1.0</wd>

<space/>

<wd l="3677" t="4512" r="4512" b="4714">similarity</wd>

<space/>

<wd l="4584" t="4565" r="5083" b="4670">score.</wd>

<space/>

<wd l="5160" t="4512" r="5784" b="4670">Includ-</wd>

</ln>

<ln l="1430" t="4766" r="5794" b="4968" baseLine="4920">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="4766" r="1704" b="4968">ing</wd>

<space/>

<wd l="1800" t="4766" r="2678" b="4968">skip-gram</wd>

<space/>

<wd l="2770" t="4766" r="3082" b="4925">and</wd>

<space/>

<wd l="3168" t="4766" r="3638" b="4968">using</wd>

<space/>

<wd l="3730" t="4819" r="3821" b="4925">a</wd>

<space/>

<wd l="3912" t="4766" r="4421" b="4968">larger</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4502" t="4824" r="4603" b="4925">n</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4699" t="4766" r="4867" b="4920">in</wd>

<space/>

<wd l="4958" t="4766" r="5794" b="4968">similarity</wd>

<space/>

</run>

</ln>

<ln l="1430" t="5016" r="5794" b="5218" baseLine="5170" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="5016" r="2030" b="5174">feature</wd>

<space/>

<wd l="2112" t="5016" r="3072" b="5174">calculation</wd>

<space/>

<wd l="3149" t="5069" r="3446" b="5174">can</wd>

<space/>

<wd l="3523" t="5016" r="4238" b="5218">mitigate</wd>

<space/>

<wd l="4315" t="5016" r="4622" b="5174">this</wd>

<space/>

<wd l="4699" t="5016" r="5434" b="5218">problem</wd>

<space/>

<wd l="5506" t="5016" r="5794" b="5174">but</wd>

<space/>

</ln>

<ln l="1430" t="5270" r="5784" b="5472" baseLine="5424" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="5294" r="2016" b="5429">cannot</wd>

<space/>

<wd l="2136" t="5294" r="2798" b="5472">prevent</wd>

<space/>

<wd l="2928" t="5270" r="3091" b="5429">it.</wd>

<space/>

<wd l="3230" t="5270" r="4286" b="5472">Fortunately,</wd>

<space/>

<wd l="4426" t="5270" r="4733" b="5429">this</wd>

<space/>

<wd l="4882" t="5270" r="5458" b="5429">should</wd>

<space/>

<wd l="5578" t="5270" r="5784" b="5429">be</wd>

<space/>

</ln>

<ln l="1426" t="5525" r="5784" b="5726" baseLine="5678" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5578" r="1819" b="5726">very</wd>

<space/>

<wd l="1939" t="5578" r="2275" b="5683">rare</wd>

<space/>

<wd l="2400" t="5525" r="2880" b="5683">when</wd>

<space/>

<wd l="3000" t="5525" r="3259" b="5683">the</wd>

<space/>

<wd l="3398" t="5525" r="4229" b="5726">similarity</wd>

<space/>

<wd l="4349" t="5549" r="5525" b="5683">measurement</wd>

<space/>

<wd l="5650" t="5525" r="5784" b="5683">is</wd>

<space/>

</ln>

<ln l="1430" t="5774" r="5784" b="5976" baseLine="5928" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="5774" r="2074" b="5976">applied</wd>

<space/>

<wd l="2150" t="5798" r="2314" b="5933">to</wd>

<space/>

<wd l="2395" t="5798" r="2717" b="5933">two</wd>

<space/>

<wd l="2803" t="5774" r="3130" b="5933">real</wd>

<space/>

<wd l="3206" t="5774" r="3720" b="5933">world</wd>

<space/>

<wd l="3797" t="5774" r="4373" b="5933">twitter</wd>

<space/>

<wd l="4445" t="5774" r="5011" b="5933">tokens</wd>

<space/>

<wd l="5093" t="5774" r="5784" b="5933">because</wd>

<space/>

</ln>

<ln l="1435" t="6029" r="5784" b="6230" baseLine="6182" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="6029" r="1829" b="6187">such</wd>

<space/>

<wd l="1896" t="6082" r="2347" b="6187">cases</wd>

<space/>

<wd l="2414" t="6029" r="3034" b="6230">require</wd>

<space/>

<wd l="3096" t="6029" r="3360" b="6187">the</wd>

<space/>

<wd l="3437" t="6029" r="4003" b="6230">strings</wd>

<space/>

<wd l="4075" t="6053" r="4238" b="6187">to</wd>

<space/>

<wd l="4301" t="6029" r="4507" b="6187">be</wd>

<space/>

<wd l="4579" t="6029" r="4958" b="6230">long</wd>

<space/>

<wd l="5030" t="6029" r="5342" b="6187">and</wd>

<space/>

<wd l="5410" t="6082" r="5784" b="6187">con-</wd>

</ln>

<ln l="1426" t="6283" r="5784" b="6485" baseLine="6437">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="6283" r="1757" b="6442">tain</wd>

<space/>

<wd l="1838" t="6283" r="2659" b="6485">repetitive</wd>

<space/>

</run>

<wd l="2750" t="6336" r="3461" b="6485"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">n</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-grams</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3557" t="6283" r="3869" b="6442">and</wd>

<space/>

<wd l="3960" t="6283" r="4968" b="6485">skip-grams.</wd>

<space/>

<wd l="5069" t="6288" r="5424" b="6442">One</wd>

<space/>

<wd l="5515" t="6283" r="5784" b="6442">ad-</wd>

</run>

</ln>

<ln l="1426" t="6533" r="5784" b="6734" baseLine="6686" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6557" r="2102" b="6734">vantage</wd>

<space/>

<wd l="2198" t="6533" r="2400" b="6691">of</wd>

<space/>

<wd l="2462" t="6586" r="2755" b="6691">our</wd>

<space/>

<wd l="2842" t="6533" r="3677" b="6734">similarity</wd>

<space/>

<wd l="3758" t="6557" r="4934" b="6691">measurement</wd>

<space/>

<wd l="5021" t="6586" r="5410" b="6691">over</wd>

<space/>

<wd l="5486" t="6542" r="5784" b="6691">Le-</wd>

</ln>

<ln l="1426" t="6787" r="5784" b="6946" baseLine="6941" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6787" r="2270" b="6946">venshtein</wd>

<space/>

<wd l="2333" t="6787" r="3038" b="6946">distance</wd>

<space/>

<wd l="3110" t="6787" r="3240" b="6946">is</wd>

<space/>

<wd l="3307" t="6787" r="3638" b="6946">that</wd>

<space/>

<wd l="3701" t="6787" r="3816" b="6946">it</wd>

<space/>

<wd l="3874" t="6787" r="4315" b="6946">takes</wd>

<space/>

<wd l="4387" t="6787" r="4714" b="6946">into</wd>

<space/>

<wd l="4786" t="6811" r="5467" b="6946">account</wd>

<space/>

<wd l="5520" t="6787" r="5784" b="6946">the</wd>

<space/>

</ln>

<ln l="1435" t="7042" r="5789" b="7243" baseLine="7195" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="7042" r="1925" b="7243">string</wd>

<space/>

<wd l="2064" t="7042" r="2606" b="7243">length</wd>

<space/>

<wd l="2736" t="7042" r="3216" b="7200">when</wd>

<space/>

<wd l="3341" t="7042" r="4258" b="7243">penalizing</wd>

<space/>

<wd l="4392" t="7066" r="4723" b="7200">text</wd>

<space/>

<wd l="4858" t="7042" r="5309" b="7200">edits.</wd>

<space/>

<wd l="5458" t="7042" r="5789" b="7200">The</wd>

<space/>

</ln>

<ln l="1435" t="7296" r="5784" b="7498" baseLine="7450" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="7349" r="1872" b="7454">same</wd>

<space/>

<wd l="1939" t="7320" r="2270" b="7454">text</wd>

<space/>

<wd l="2333" t="7296" r="2659" b="7454">edit</wd>

<space/>

<wd l="2717" t="7296" r="3000" b="7454">has</wd>

<space/>

<wd l="3072" t="7349" r="3163" b="7454">a</wd>

<space/>

<wd l="3221" t="7296" r="3792" b="7498">bigger</wd>

<space/>

<wd l="3850" t="7296" r="4445" b="7498">impact</wd>

<space/>

<wd l="4502" t="7296" r="4982" b="7454">when</wd>

<space/>

<wd l="5045" t="7296" r="5160" b="7454">it</wd>

<space/>

<wd l="5227" t="7349" r="5784" b="7454">occurs</wd>

<space/>

</ln>

<ln l="1430" t="7546" r="5813" b="7747" baseLine="7699" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7546" r="1598" b="7699">in</wd>

<space/>

<wd l="1675" t="7598" r="1766" b="7704">a</wd>

<space/>

<wd l="1853" t="7546" r="2285" b="7704">short</wd>

<space/>

<wd l="2366" t="7546" r="2851" b="7747">string</wd>

<space/>

<wd l="2928" t="7546" r="3302" b="7704">than</wd>

<space/>

<wd l="3389" t="7546" r="3552" b="7699">in</wd>

<space/>

<wd l="3634" t="7598" r="3725" b="7704">a</wd>

<space/>

<wd l="3802" t="7546" r="4186" b="7747">long</wd>

<space/>

<wd l="4272" t="7546" r="4762" b="7747">string</wd>

<space/>

<wd l="4834" t="7546" r="5530" b="7704">because</wd>

<space/>

<wd l="5611" t="7546" r="5813" b="7704">of</wd>

<space/>

</ln>

<ln l="1426" t="7800" r="5784" b="7958" baseLine="7954" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="7800" r="1690" b="7958">the</wd>

<space/>

<wd l="1800" t="7800" r="2909" b="7958">denominator</wd>

<space/>

<wd l="3010" t="7800" r="3178" b="7954">in</wd>

<space/>

<wd l="3278" t="7800" r="3936" b="7958">Jaccard</wd>

<space/>

<wd l="4042" t="7800" r="4579" b="7958">Index.</wd>

<space/>

<wd l="4690" t="7800" r="5414" b="7958">Another</wd>

<space/>

<wd l="5515" t="7800" r="5784" b="7958">ad-</wd>

</ln>

<ln l="1426" t="8054" r="5794" b="8256" baseLine="8208" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8078" r="2102" b="8256">vantage</wd>

<space/>

<wd l="2198" t="8054" r="2400" b="8213">of</wd>

<space/>

<wd l="2467" t="8107" r="2760" b="8213">our</wd>

<space/>

<wd l="2851" t="8054" r="3686" b="8256">similarity</wd>

<space/>

<wd l="3768" t="8078" r="4944" b="8213">measurement</wd>

<space/>

<wd l="5030" t="8054" r="5165" b="8213">is</wd>

<space/>

<wd l="5256" t="8054" r="5587" b="8213">that</wd>

<space/>

<wd l="5674" t="8054" r="5794" b="8213">it</wd>

<space/>

</ln>

<ln l="1421" t="8304" r="5784" b="8506" baseLine="8458" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8304" r="1930" b="8462">better</wd>

<space/>

<wd l="2069" t="8304" r="2736" b="8462">handles</wd>

<space/>

<wd l="2885" t="8304" r="3730" b="8506">repetition</wd>

<space/>

<wd l="3878" t="8304" r="4810" b="8496">characters,</wd>

<space/>

<wd l="4963" t="8304" r="5501" b="8462">which</wd>

<space/>

<wd l="5650" t="8304" r="5784" b="8462">is</wd>

<space/>

</ln>

<ln l="1430" t="8558" r="5794" b="8760" baseLine="8712" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="8558" r="2366" b="8760">commonly</wd>

<space/>

<wd l="2438" t="8558" r="2842" b="8717">used</wd>

<space/>

<wd l="2914" t="8558" r="3082" b="8712">in</wd>

<space/>

<wd l="3158" t="8558" r="3845" b="8717">Twitter.</wd>

<space/>

<wd l="3926" t="8568" r="4234" b="8717">For</wd>

<space/>

<wd l="4306" t="8558" r="5088" b="8760">example,</wd>

<space/>

<wd l="5174" t="8558" r="5434" b="8717">for</wd>

<space/>

<wd l="5506" t="8611" r="5794" b="8717">our</wd>

<space/>

</ln>

<ln l="1435" t="8813" r="5794" b="9014" baseLine="8966" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="8813" r="2270" b="9014">similarity</wd>

<space/>

<wd l="2467" t="8837" r="3686" b="9005">measurement,</wd>

<space/>

<wd l="3888" t="8813" r="4286" b="8971">both</wd>

<space/>

<wd l="4488" t="8813" r="5270" b="8971">“looove”</wd>

<space/>

<wd l="5482" t="8813" r="5794" b="8971">and</wd>

<space/>

</ln>

<ln l="1430" t="9067" r="5794" b="9269" baseLine="9221" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="9067" r="2544" b="9226">“loooooove”</wd>

<space/>

<wd l="2659" t="9120" r="2914" b="9226">are</wd>

<space/>

<wd l="3029" t="9067" r="3667" b="9269">equally</wd>

<space/>

<wd l="3778" t="9067" r="4382" b="9226">similar</wd>

<space/>

<wd l="4483" t="9091" r="4646" b="9226">to</wd>

<space/>

<wd l="4762" t="9067" r="5371" b="9226">“love”.</wd>

<space/>

<wd l="5486" t="9077" r="5794" b="9226">For</wd>

<space/>

</ln>

<ln l="1426" t="9317" r="5794" b="9509" baseLine="9470" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="9317" r="2501" b="9475">Levenshtein</wd>

<space/>

<wd l="2578" t="9317" r="3336" b="9509">distance,</wd>

<space/>

<wd l="3422" t="9317" r="4536" b="9475">“loooooove”</wd>

<space/>

<wd l="4613" t="9317" r="5054" b="9475">takes</wd>

<space/>

<wd l="5141" t="9370" r="5232" b="9475">a</wd>

<space/>

<wd l="5304" t="9317" r="5794" b="9475">much</wd>

<space/>

</ln>

<ln l="1426" t="9528" r="5784" b="9773" baseLine="9720">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="9571" r="2078" b="9730">heavier</wd>

<space/>

<wd l="2160" t="9571" r="2813" b="9773">penalty</wd>

<space/>

<wd l="2904" t="9571" r="3278" b="9730">than</wd>

<space/>

</run>

<wd l="3379" t="9528" r="4277" b="9730"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">“looove”.</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4354" t="9571" r="4685" b="9730">The</wd>

<space/>

<wd l="4781" t="9571" r="5419" b="9773">biggest</wd>

<space/>

<wd l="5515" t="9571" r="5784" b="9730">ad-</wd>

</run>

</ln>

<ln l="1426" t="9826" r="5784" b="10027" baseLine="9979" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="9850" r="2102" b="10027">vantage</wd>

<space/>

<wd l="2198" t="9826" r="2400" b="9984">of</wd>

<space/>

<wd l="2462" t="9878" r="2755" b="9984">our</wd>

<space/>

<wd l="2842" t="9826" r="3677" b="10027">similarity</wd>

<space/>

<wd l="3758" t="9850" r="4934" b="9984">measurement</wd>

<space/>

<wd l="5021" t="9878" r="5410" b="9984">over</wd>

<space/>

<wd l="5486" t="9835" r="5784" b="9984">Le-</wd>

</ln>

<ln l="1426" t="10075" r="5789" b="10277" baseLine="10229" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10075" r="2270" b="10234">venshtein</wd>

<space/>

<wd l="2400" t="10075" r="3110" b="10234">distance</wd>

<space/>

<wd l="3245" t="10075" r="3379" b="10234">is</wd>

<space/>

<wd l="3514" t="10075" r="3773" b="10234">the</wd>

<space/>

<wd l="3912" t="10075" r="4411" b="10234">lower</wd>

<space/>

<wd l="4536" t="10075" r="5789" b="10277">computational</wd>

<space/>

</ln>

<ln l="1430" t="10330" r="5779" b="10531" baseLine="10483">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="10330" r="2458" b="10531">complexity.</wd>

<space/>

<wd l="2558" t="10339" r="2851" b="10488">Let</wd>

<space/>

<wd l="2938" t="10330" r="3202" b="10488">the</wd>

<space/>

<wd l="3298" t="10330" r="3845" b="10531">length</wd>

<space/>

<wd l="3936" t="10330" r="4138" b="10488">of</wd>

<space/>

<wd l="4210" t="10382" r="4301" b="10488">a</wd>

<space/>

<wd l="4397" t="10330" r="4886" b="10531">string</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4973" t="10387" r="5059" b="10488">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5146" t="10330" r="5347" b="10488">be</wd>

<space/>

</run>

<wd l="5448" t="10330" r="5779" b="10531"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1430" t="10584" r="5779" b="10786" baseLine="10738">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="10584" r="1762" b="10742">The</wd>

<space/>

<wd l="1858" t="10584" r="2458" b="10742">feature</wd>

<space/>

<wd l="2563" t="10608" r="2798" b="10742">set</wd>

<space/>

<wd l="2894" t="10584" r="3221" b="10742">size</wd>

<space/>

<wd l="3317" t="10584" r="3518" b="10742">of</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3576" t="10642" r="3662" b="10742">s</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3758" t="10584" r="3893" b="10742">is</wd>

<space/>

<wd l="3984" t="10584" r="4742" b="10742">bounded</wd>

<space/>

<wd l="4824" t="10584" r="5050" b="10786">by</wd>

<space/>

</run>

<wd l="5146" t="10584" r="5779" b="10786"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">O</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1430" t="10834" r="5794" b="11035" baseLine="10987" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="10834" r="1877" b="10992">Then</wd>

<space/>

<wd l="1963" t="10834" r="2227" b="10992">the</wd>

<space/>

<wd l="2323" t="10834" r="3312" b="11035">complexity</wd>

<space/>

<wd l="3398" t="10834" r="3600" b="10992">of</wd>

<space/>

<wd l="3672" t="10834" r="4627" b="11035">calculating</wd>

<space/>

<wd l="4718" t="10834" r="5794" b="10992">Levenshtein</wd>

<space/>

</ln>

<ln l="1430" t="11088" r="5794" b="11290" baseLine="11245">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="11088" r="2141" b="11246">distance</wd>

<space/>

<wd l="2218" t="11088" r="2952" b="11246">between</wd>

<space/>

</run>

<wd l="3019" t="11146" r="3173" b="11270"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3259" t="11088" r="3571" b="11246">and</wd>

<space/>

</run>

<wd l="3638" t="11146" r="3797" b="11270"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3878" t="11088" r="4013" b="11246">is</wd>

<space/>

</run>

<wd l="4104" t="11088" r="5170" b="11290"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">O</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5256" t="11088" r="5794" b="11246">which</wd>

<space/>

</run>

</ln>

<ln l="1430" t="11342" r="5794" b="11544" baseLine="11496" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="11342" r="1565" b="11501">is</wd>

<space/>

<wd l="1742" t="11342" r="2549" b="11544">quadratic</wd>

<space/>

<wd l="2717" t="11342" r="3197" b="11501">when</wd>

<space/>

<wd l="3360" t="11366" r="3677" b="11501">two</wd>

<space/>

<wd l="3859" t="11342" r="4430" b="11544">strings</wd>

<space/>

<wd l="4598" t="11342" r="5011" b="11501">have</wd>

<space/>

<wd l="5189" t="11342" r="5794" b="11501">similar</wd>

<space/>

</ln>

<ln l="1430" t="11597" r="5784" b="11798" baseLine="11750" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="11597" r="2021" b="11798">length.</wd>

<space/>

<wd l="2102" t="11602" r="2366" b="11755">On</wd>

<space/>

<wd l="2429" t="11597" r="2693" b="11755">the</wd>

<space/>

<wd l="2770" t="11621" r="3542" b="11798">contrary,</wd>

<space/>

<wd l="3619" t="11597" r="3878" b="11755">the</wd>

<space/>

<wd l="3955" t="11597" r="4944" b="11798">complexity</wd>

<space/>

<wd l="5011" t="11597" r="5213" b="11755">of</wd>

<space/>

<wd l="5261" t="11597" r="5784" b="11755">calcu-</wd>

</ln>

<ln l="1430" t="11846" r="5784" b="12048" baseLine="12000" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="11846" r="1925" b="12048">lating</wd>

<space/>

<wd l="2285" t="11899" r="2578" b="12005">our</wd>

<space/>

<wd l="2933" t="11846" r="3768" b="12048">similarity</wd>

<space/>

<wd l="4118" t="11870" r="5294" b="12005">measurement</wd>

<space/>

<wd l="5650" t="11846" r="5784" b="12005">is</wd>

<space/>

</ln>

<ln l="1435" t="12058" r="4109" b="12302" baseLine="12249">

<wd l="1435" t="12101" r="2626" b="12302"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">O</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)+</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2693" t="12101" r="3230" b="12259">which</wd>

<space/>

<wd l="3288" t="12101" r="3422" b="12259">is</wd>

<space/>

</run>

<wd l="3490" t="12058" r="4109" b="12259"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">linear.</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1430" t="12355" r="5784" b="12806" alignment="justified" li="72" ri="72" spaceAfter="218" fli="216" lsp="exactly" lspExact="240" language="en">

<ln l="1651" t="12355" r="5784" b="12514" baseLine="12509" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="12365" r="1954" b="12514">We</wd>

<space/>

<wd l="2035" t="12355" r="2515" b="12514">index</wd>

<space/>

<wd l="2597" t="12355" r="2808" b="12514">all</wd>

<space/>

<wd l="2885" t="12355" r="3149" b="12514">the</wd>

<space/>

<wd l="3230" t="12355" r="4066" b="12514">canonical</wd>

<space/>

<wd l="4147" t="12355" r="4646" b="12514">forms</wd>

<space/>

<wd l="4733" t="12355" r="4901" b="12509">in</wd>

<space/>

<wd l="4973" t="12355" r="5237" b="12514">the</wd>

<space/>

<wd l="5314" t="12355" r="5784" b="12514">train-</wd>

</ln>

<ln l="1430" t="12605" r="5784" b="12806" baseLine="12758" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12605" r="1704" b="12806">ing</wd>

<space/>

<wd l="1786" t="12605" r="2146" b="12763">data</wd>

<space/>

<wd l="2218" t="12605" r="2722" b="12763">based</wd>

<space/>

<wd l="2803" t="12658" r="3014" b="12763">on</wd>

<space/>

<wd l="3101" t="12605" r="3936" b="12806">similarity</wd>

<space/>

<wd l="4013" t="12605" r="4694" b="12763">features</wd>

<space/>

<wd l="4776" t="12629" r="4939" b="12763">to</wd>

<space/>

<wd l="5026" t="12605" r="5784" b="12763">facilitate</wd>

</ln>

</para>

<rulerline l="1426" t="13042" r="4310" b="13042" type="single" width="14" color="000000"/>

<para l="1426" t="13162" r="5760" b="14189" alignment="left" li="72" ri="72" spaceBefore="131" lsp="exactly" lspExact="206" language="en">

<ln l="1435" t="13162" r="5530" b="13363" baseLine="13322">

<run underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1435" t="13162" r="1474" b="13243">1</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1536" t="13200" r="2064" b="13330">Certain</wd>

<space/>

<wd l="2102" t="13200" r="3101" b="13363">preprocessing</wd>

<space/>

<wd l="3154" t="13238" r="3398" b="13330">can</wd>

<space/>

<wd l="3446" t="13200" r="4032" b="13363">mitigate</wd>

<space/>

<wd l="4080" t="13200" r="4339" b="13330">this</wd>

<space/>

<wd l="4382" t="13200" r="4987" b="13363">problem</wd>

<space/>

<wd l="5035" t="13200" r="5242" b="13330">for</wd>

<space/>

<wd l="5285" t="13205" r="5530" b="13330">Le-</wd>

</run>

</ln>

<ln l="1426" t="13406" r="5702" b="13570" baseLine="13531" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13406" r="2117" b="13536">venshtein</wd>

<space/>

<wd l="2165" t="13406" r="2784" b="13536">distance.</wd>

<space/>

<wd l="2842" t="13411" r="3096" b="13536">For</wd>

<space/>

<wd l="3139" t="13406" r="3787" b="13570">example,</wd>

<space/>

<wd l="3840" t="13406" r="4013" b="13536">all</wd>

<space/>

<wd l="4066" t="13406" r="4488" b="13570">single</wd>

<space/>

<wd l="4541" t="13406" r="5198" b="13536">character</wd>

<space/>

<wd l="5242" t="13406" r="5702" b="13570">repeti-</wd>

</ln>

<ln l="1426" t="13613" r="5693" b="13776" baseLine="13738" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13613" r="1771" b="13742">tions</wd>

<space/>

<wd l="1824" t="13632" r="2045" b="13776">get</wd>

<space/>

<wd l="2088" t="13613" r="2659" b="13742">reduced</wd>

<space/>

<wd l="2702" t="13632" r="2837" b="13742">to</wd>

<space/>

<wd l="2885" t="13632" r="3154" b="13742">two</wd>

<space/>

<wd l="3197" t="13613" r="3658" b="13742">before</wd>

<space/>

<wd l="3706" t="13613" r="4584" b="13742">Levenshtein</wd>

<space/>

<wd l="4637" t="13613" r="5218" b="13742">distance</wd>

<space/>

<wd l="5270" t="13613" r="5381" b="13742">is</wd>

<space/>

<wd l="5434" t="13613" r="5693" b="13742">cal-</wd>

</ln>

<ln l="1430" t="13819" r="5760" b="13982" baseLine="13944" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="13819" r="1978" b="13949">culated.</wd>

<space/>

<wd l="2035" t="13824" r="2299" b="13949">But</wd>

<space/>

<wd l="2347" t="13819" r="2443" b="13949">it</wd>

<space/>

<wd l="2491" t="13819" r="2813" b="13949">does</wd>

<space/>

<wd l="2861" t="13838" r="3096" b="13949">not</wd>

<space/>

<wd l="3139" t="13819" r="3614" b="13949">handle</wd>

<space/>

<wd l="3662" t="13819" r="4354" b="13982">repetition</wd>

<space/>

<wd l="4402" t="13819" r="4570" b="13949">of</wd>

<space/>

<wd l="4594" t="13819" r="5189" b="13982">multiple</wd>

<space/>

<wd l="5242" t="13819" r="5760" b="13949">charac-</wd>

</ln>

<ln l="1426" t="14026" r="2606" b="14189" baseLine="14150" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="14045" r="1723" b="14184">ters,</wd>

<space/>

<wd l="1781" t="14064" r="2026" b="14189">e.g.</wd>

<space/>

<wd l="2088" t="14026" r="2606" b="14155">“lolol”.</wd>

</ln>

</para>

<para l="1426" t="14194" r="5765" b="15432" alignment="justified" li="72" ri="72" spaceBefore="8" lsp="exactly" lspExact="205" language="en">

<ln l="1426" t="14194" r="5678" b="14395" baseLine="14354">

<run underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="14194" r="1483" b="14275">2</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1536" t="14232" r="1810" b="14362">The</wd>

<space/>

<wd l="1862" t="14232" r="2270" b="14362">linear</wd>

<space/>

<wd l="2314" t="14232" r="3120" b="14395">complexity</wd>

<space/>

<wd l="3173" t="14232" r="3754" b="14395">depends</wd>

<space/>

<wd l="3806" t="14270" r="3984" b="14362">on</wd>

<space/>

<wd l="4027" t="14232" r="4411" b="14395">using</wd>

<space/>

<wd l="4459" t="14232" r="4790" b="14362">hash</wd>

<space/>

<wd l="4834" t="14232" r="5184" b="14362">table</wd>

<space/>

<wd l="5232" t="14251" r="5366" b="14362">to</wd>

<space/>

<wd l="5419" t="14232" r="5678" b="14362">cal-</wd>

</run>

</ln>

<ln l="1430" t="14438" r="5765" b="14602" baseLine="14563" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14438" r="1853" b="14568">culate</wd>

<space/>

<wd l="1906" t="14458" r="2102" b="14568">set</wd>

<space/>

<wd l="2146" t="14438" r="2558" b="14568">union</wd>

<space/>

<wd l="2606" t="14438" r="2861" b="14568">and</wd>

<space/>

<wd l="2914" t="14438" r="3782" b="14568">intersection.</wd>

<space/>

<wd l="3835" t="14438" r="4430" b="14568">Another</wd>

<space/>

<wd l="4478" t="14438" r="5602" b="14602">implementation</wd>

<space/>

<wd l="5650" t="14438" r="5765" b="14568">is</wd>

<space/>

</ln>

<ln l="1430" t="14645" r="5650" b="14808" baseLine="14770" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14645" r="1925" b="14808">sorting</wd>

<space/>

<wd l="1973" t="14645" r="2189" b="14774">the</wd>

<space/>

<wd l="2242" t="14645" r="2928" b="14808">similarity</wd>

<space/>

<wd l="2976" t="14645" r="3538" b="14774">features</wd>

<space/>

<wd l="3590" t="14645" r="3878" b="14774">first</wd>

<space/>

<wd l="3926" t="14645" r="4181" b="14774">and</wd>

<space/>

<wd l="4224" t="14645" r="4536" b="14774">then</wd>

<space/>

<wd l="4584" t="14645" r="5366" b="14808">calculating</wd>

<space/>

<wd l="5414" t="14683" r="5650" b="14774">un-</wd>

</ln>

<ln l="1430" t="14856" r="5669" b="15019" baseLine="14981">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="14856" r="1656" b="14986">ion</wd>

<space/>

<wd l="1704" t="14856" r="1963" b="14986">and</wd>

<space/>

<wd l="2011" t="14856" r="2885" b="15014">intersection,</wd>

<space/>

<wd l="2938" t="14856" r="3379" b="14986">which</wd>

<space/>

<wd l="3422" t="14856" r="3658" b="14986">has</wd>

<space/>

</run>

<wd l="3715" t="14856" r="4488" b="15019"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">O</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">*</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">log</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">))</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4546" t="14856" r="5352" b="15019">complexity</wd>

<space/>

</run>

<wd l="5400" t="14856" r="5515" b="15019"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">l</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5554" t="14856" r="5669" b="14986">is</wd>

<space/>

</run>

</ln>

<ln l="1426" t="15062" r="5702" b="15226" baseLine="15187" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="15062" r="1642" b="15192">the</wd>

<space/>

<wd l="1694" t="15062" r="2155" b="15226">longer</wd>

<space/>

<wd l="2203" t="15062" r="2602" b="15226">string</wd>

<space/>

<wd l="2654" t="15062" r="3101" b="15226">length</wd>

<space/>

<wd l="3149" t="15062" r="3317" b="15192">of</wd>

<space/>

<wd l="3341" t="15062" r="3557" b="15192">the</wd>

<space/>

<wd l="3605" t="15082" r="3874" b="15192">two</wd>

<space/>

<wd l="3926" t="15062" r="4454" b="15226">strings)</wd>

<space/>

<wd l="4512" t="15062" r="4766" b="15192">and</wd>

<space/>

<wd l="4814" t="15062" r="4930" b="15192">is</wd>

<space/>

<wd l="4982" t="15062" r="5246" b="15192">still</wd>

<space/>

<wd l="5285" t="15062" r="5702" b="15192">better</wd>

<space/>

</ln>

<ln l="1426" t="15269" r="5093" b="15432" baseLine="15394" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="15269" r="1738" b="15398">than</wd>

<space/>

<wd l="1786" t="15269" r="2448" b="15432">quadratic</wd>

<space/>

<wd l="2501" t="15269" r="3307" b="15432">complexity</wd>

<space/>

<wd l="3355" t="15269" r="3523" b="15398">of</wd>

<space/>

<wd l="3547" t="15269" r="4426" b="15398">Levenshtein</wd>

<space/>

<wd l="4474" t="15269" r="5093" b="15398">distance.</wd>

</ln>

</para>

</column>

<column l="6032" t="1427" r="10626" b="15259">

<para l="6130" t="1474" r="10502" b="3955" alignment="justified" li="72" ri="72" spaceBefore="6" lsp="exactly" lspExact="253" language="en">

<ln l="6139" t="1474" r="10502" b="1675" baseLine="1627">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="1474" r="6768" b="1675">finding</wd>

<space/>

</run>

<wd l="6912" t="1498" r="7411" b="1675"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">top-</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">m</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7565" t="1474" r="8400" b="1632">canonical</wd>

<space/>

<wd l="8549" t="1474" r="9048" b="1632">forms</wd>

<space/>

<wd l="9197" t="1474" r="9528" b="1632">that</wd>

<space/>

<wd l="9667" t="1526" r="9926" b="1632">are</wd>

<space/>

<wd l="10070" t="1498" r="10502" b="1632">most</wd>

<space/>

</run>

</ln>

<ln l="6144" t="1728" r="10488" b="1930" baseLine="1882" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="1728" r="6749" b="1886">similar</wd>

<space/>

<wd l="6821" t="1752" r="6984" b="1886">to</wd>

<space/>

<wd l="7066" t="1728" r="7325" b="1886">the</wd>

<space/>

<wd l="7411" t="1781" r="7910" b="1930">query</wd>

<space/>

<wd l="7982" t="1728" r="8515" b="1886">token.</wd>

<space/>

<wd l="8602" t="1728" r="9139" b="1886">Given</wd>

<space/>

<wd l="9216" t="1781" r="9307" b="1886">a</wd>

<space/>

<wd l="9389" t="1781" r="9883" b="1930">query</wd>

<space/>

<wd l="9955" t="1728" r="10488" b="1920">token,</wd>

<space/>

</ln>

<ln l="6134" t="1982" r="10493" b="2184" baseLine="2136" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="2035" r="6384" b="2141">we</wd>

<space/>

<wd l="6461" t="2035" r="6763" b="2141">can</wd>

<space/>

<wd l="6830" t="1982" r="7488" b="2184">quickly</wd>

<space/>

<wd l="7550" t="2035" r="8174" b="2141">narrow</wd>

<space/>

<wd l="8242" t="1982" r="8726" b="2141">down</wd>

<space/>

<wd l="8798" t="2035" r="9086" b="2141">our</wd>

<space/>

<wd l="9158" t="1982" r="9710" b="2141">search</wd>

<space/>

<wd l="9787" t="2035" r="10258" b="2184">space</wd>

<space/>

<wd l="10330" t="2006" r="10493" b="2141">to</wd>

<space/>

</ln>

<ln l="6139" t="2237" r="10502" b="2438" baseLine="2390" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2237" r="6974" b="2395">canonical</wd>

<space/>

<wd l="7051" t="2237" r="7550" b="2395">forms</wd>

<space/>

<wd l="7632" t="2237" r="7963" b="2395">that</wd>

<space/>

<wd l="8040" t="2237" r="8491" b="2395">share</wd>

<space/>

<wd l="8573" t="2261" r="8726" b="2395">at</wd>

<space/>

<wd l="8798" t="2237" r="9202" b="2395">least</wd>

<space/>

<wd l="9274" t="2290" r="9581" b="2395">one</wd>

<space/>

<wd l="9667" t="2237" r="10502" b="2438">similarity</wd>

<space/>

</ln>

<ln l="6139" t="2486" r="10502" b="2688" baseLine="2640" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2486" r="6739" b="2645">feature</wd>

<space/>

<wd l="6830" t="2486" r="7224" b="2645">with</wd>

<space/>

<wd l="7310" t="2486" r="7574" b="2645">the</wd>

<space/>

<wd l="7670" t="2539" r="8170" b="2688">query</wd>

<space/>

<wd l="8256" t="2486" r="8789" b="2645">token.</wd>

<space/>

<wd l="8885" t="2486" r="9538" b="2645">Further</wd>

<space/>

<wd l="9624" t="2486" r="10502" b="2688">efficiency</wd>

<space/>

</ln>

<ln l="6139" t="2741" r="10493" b="2942" baseLine="2894" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2741" r="7310" b="2942">improvement</wd>

<space/>

<wd l="7392" t="2794" r="7694" b="2899">can</wd>

<space/>

<wd l="7766" t="2741" r="7973" b="2899">be</wd>

<space/>

<wd l="8064" t="2741" r="8837" b="2899">achieved</wd>

<space/>

<wd l="8914" t="2741" r="9139" b="2942">by</wd>

<space/>

<wd l="9221" t="2741" r="10493" b="2942">approximating</wd>

<space/>

</ln>

<ln l="6134" t="2995" r="10493" b="3197" baseLine="3149" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="2995" r="6398" b="3154">the</wd>

<space/>

<wd l="6470" t="2995" r="7579" b="3154">denominator</wd>

<space/>

<wd l="7642" t="2995" r="7810" b="3149">in</wd>

<space/>

<wd l="7872" t="2995" r="8530" b="3154">Jaccard</wd>

<space/>

<wd l="8597" t="2995" r="9086" b="3154">Index</wd>

<space/>

<wd l="9149" t="2995" r="9658" b="3154">based</wd>

<space/>

<wd l="9720" t="3048" r="9936" b="3154">on</wd>

<space/>

<wd l="10008" t="2995" r="10493" b="3197">string</wd>

<space/>

</ln>

<ln l="6139" t="3245" r="10493" b="3446" baseLine="3398" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="3245" r="6763" b="3446">lengths</wd>

<space/>

<wd l="6854" t="3298" r="7037" b="3403">or</wd>

<space/>

<wd l="7109" t="3245" r="7334" b="3446">by</wd>

<space/>

<wd l="7421" t="3245" r="8227" b="3446">imposing</wd>

<space/>

<wd l="8314" t="3245" r="9283" b="3403">restrictions</wd>

<space/>

<wd l="9379" t="3298" r="9590" b="3403">on</wd>

<space/>

<wd l="9672" t="3245" r="9936" b="3403">the</wd>

<space/>

<wd l="10022" t="3245" r="10493" b="3398">mini-</wd>

</ln>

<ln l="6134" t="3499" r="10502" b="3701" baseLine="3653" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="3552" r="6586" b="3658">mum</wd>

<space/>

<wd l="6667" t="3499" r="7344" b="3658">number</wd>

<space/>

<wd l="7426" t="3499" r="7627" b="3658">of</wd>

<space/>

<wd l="7694" t="3499" r="8530" b="3701">similarity</wd>

<space/>

<wd l="8611" t="3499" r="9298" b="3658">features</wd>

<space/>

<wd l="9384" t="3523" r="9547" b="3658">to</wd>

<space/>

<wd l="9634" t="3499" r="9840" b="3658">be</wd>

<space/>

<wd l="9936" t="3499" r="10502" b="3658">shared</wd>

<space/>

</ln>

<ln l="6130" t="3754" r="8501" b="3955" baseLine="3907" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="3754" r="6355" b="3955">by</wd>

<space/>

<wd l="6413" t="3806" r="6912" b="3955">query</wd>

<space/>

<wd l="6965" t="3754" r="7454" b="3912">token</wd>

<space/>

<wd l="7517" t="3754" r="7829" b="3912">and</wd>

<space/>

<wd l="7882" t="3754" r="8501" b="3912">results.</wd>

</ln>

</para>

<para l="6139" t="4166" r="8765" b="4325" alignment="left" li="72" ri="72" spaceBefore="169" lsp="exactly" lspExact="245" language="en">

<ln l="6139" t="4166" r="8765" b="4325" baseLine="4320" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="9">

<wd l="6139" t="4171" r="6398" b="4325">2.3</wd>

<space/>

<wd l="6715" t="4171" r="7685" b="4325">Candidate</wd>

<space/>

<wd l="7742" t="4171" r="8765" b="4325">Evaluation</wd>

</ln>

</para>

<para l="6130" t="4536" r="10507" b="7987" alignment="justified" li="72" ri="72" spaceBefore="119" lsp="exactly" lspExact="252" language="en">

<ln l="6139" t="4536" r="10493" b="4728" baseLine="4691">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="4536" r="6672" b="4694">Given</wd>

<space/>

<wd l="6734" t="4589" r="6826" b="4694">a</wd>

<space/>

<wd l="6888" t="4560" r="7363" b="4694">tweet</wd>

<space/>

</run>

<wd l="7435" t="4546" r="7584" b="4728"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">T</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7661" t="4589" r="7968" b="4694">one</wd>

<space/>

<wd l="8035" t="4536" r="8237" b="4694">of</wd>

<space/>

<wd l="8275" t="4536" r="8472" b="4694">its</wd>

<space/>

<wd l="8534" t="4536" r="9024" b="4694">token</wd>

<space/>

</run>

<wd l="9086" t="4565" r="9182" b="4723"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9245" t="4536" r="9557" b="4694">and</wd>

<space/>

<wd l="9619" t="4589" r="9926" b="4694">one</wd>

<space/>

<wd l="9994" t="4536" r="10195" b="4694">of</wd>

<space/>

<wd l="10234" t="4536" r="10493" b="4694">the</wd>

<space/>

</run>

</ln>

<ln l="6134" t="4790" r="10507" b="4992" baseLine="4944">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="4790" r="6773" b="4949">token’s</wd>

<space/>

<wd l="6874" t="4790" r="7704" b="4949">candidate</wd>

<space/>

</run>

<wd l="7805" t="4848" r="7939" b="4982"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8040" t="4843" r="8290" b="4949">we</wd>

<space/>

<wd l="8381" t="4790" r="8784" b="4949">train</wd>

<space/>

<wd l="8880" t="4843" r="8971" b="4949">a</wd>

<space/>

<wd l="9053" t="4790" r="9619" b="4992">binary</wd>

<space/>

<wd l="9710" t="4790" r="10507" b="4949">classifier</wd>

<space/>

</run>

</ln>

<ln l="6134" t="5045" r="10498" b="5246" baseLine="5198">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="5045" r="6466" b="5203">that</wd>

<space/>

<wd l="6557" t="5045" r="7248" b="5246">predicts</wd>

<space/>

<wd l="7349" t="5045" r="8064" b="5203">whether</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8160" t="5102" r="8251" b="5203">c</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8352" t="5045" r="8486" b="5203">is</wd>

<space/>

<wd l="8587" t="5045" r="8851" b="5203">the</wd>

<space/>

<wd l="8957" t="5069" r="9562" b="5203">correct</wd>

<space/>

<wd l="9662" t="5045" r="10498" b="5203">canonical</wd>

<space/>

</run>

</ln>

<ln l="6139" t="5299" r="10493" b="5501" baseLine="5454">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="5299" r="6562" b="5458">form</wd>

<space/>

<wd l="6629" t="5299" r="6826" b="5458">of</wd>

<space/>

</run>

<wd l="6869" t="5328" r="6965" b="5486"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7027" t="5299" r="7195" b="5453">in</wd>

<space/>

<wd l="7253" t="5299" r="7517" b="5458">the</wd>

<space/>

<wd l="7579" t="5323" r="8059" b="5458">tweet</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8131" t="5309" r="8261" b="5453">T</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8299" t="5299" r="8616" b="5458">and</wd>

<space/>

<wd l="8678" t="5323" r="9312" b="5501">outputs</wd>

<space/>

<wd l="9384" t="5352" r="9475" b="5458">a</wd>

<space/>

<wd l="9538" t="5299" r="10493" b="5458">confidence</wd>

<space/>

</run>

</ln>

<ln l="6144" t="5549" r="10493" b="5750" baseLine="5702" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="5602" r="6595" b="5707">score</wd>

<space/>

<wd l="6701" t="5549" r="6955" b="5707">for</wd>

<space/>

<wd l="7046" t="5549" r="7310" b="5707">the</wd>

<space/>

<wd l="7406" t="5549" r="8347" b="5750">prediction.</wd>

<space/>

<wd l="8453" t="5554" r="9110" b="5750">Among</wd>

<space/>

<wd l="9206" t="5549" r="9470" b="5707">the</wd>

<space/>

<wd l="9576" t="5549" r="10493" b="5707">candidates</wd>

<space/>

</ln>

<ln l="6134" t="5803" r="10493" b="6005" baseLine="5957" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="5803" r="6466" b="5962">that</wd>

<space/>

<wd l="6557" t="5803" r="6821" b="5962">the</wd>

<space/>

<wd l="6926" t="5803" r="7718" b="5962">classifier</wd>

<space/>

<wd l="7805" t="5803" r="8496" b="6005">predicts</wd>

<space/>

<wd l="8597" t="5827" r="8760" b="5962">to</wd>

<space/>

<wd l="8861" t="5803" r="9067" b="5962">be</wd>

<space/>

<wd l="9163" t="5803" r="9427" b="5962">the</wd>

<space/>

<wd l="9533" t="5827" r="10138" b="5962">correct</wd>

<space/>

<wd l="10238" t="5856" r="10493" b="5962">ca-</wd>

</ln>

<ln l="6134" t="6058" r="10502" b="6259" baseLine="6211" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6058" r="6778" b="6216">nonical</wd>

<space/>

<wd l="6850" t="6058" r="7402" b="6250">forms,</wd>

<space/>

<wd l="7478" t="6110" r="7728" b="6216">we</wd>

<space/>

<wd l="7810" t="6058" r="8299" b="6216">select</wd>

<space/>

<wd l="8366" t="6058" r="8626" b="6216">the</wd>

<space/>

<wd l="8702" t="6110" r="9010" b="6216">one</wd>

<space/>

<wd l="9077" t="6058" r="9470" b="6216">with</wd>

<space/>

<wd l="9533" t="6058" r="9797" b="6216">the</wd>

<space/>

<wd l="9864" t="6058" r="10502" b="6259">highest</wd>

<space/>

</ln>

<ln l="6139" t="6307" r="10502" b="6494" baseLine="6461">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="6307" r="7094" b="6466">confidence</wd>

<space/>

<wd l="7195" t="6360" r="7646" b="6466">score</wd>

<space/>

<wd l="7742" t="6360" r="7910" b="6466">as</wd>

<space/>

<wd l="8006" t="6307" r="8270" b="6466">the</wd>

<space/>

<wd l="8366" t="6307" r="9202" b="6466">canonical</wd>

<space/>

<wd l="9298" t="6307" r="9715" b="6466">form</wd>

<space/>

<wd l="9811" t="6307" r="10013" b="6466">of</wd>

<space/>

</run>

<wd l="10080" t="6336" r="10219" b="6494"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="10320" t="6317" r="10502" b="6461">In</wd>

<space/>

</run>

</ln>

<ln l="6139" t="6562" r="10498" b="6763" baseLine="6715" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="6614" r="6432" b="6720">our</wd>

<space/>

<wd l="6552" t="6562" r="7925" b="6763">implementation</wd>

<space/>

<wd l="8045" t="6562" r="8246" b="6720">of</wd>

<space/>

<wd l="8342" t="6562" r="8602" b="6720">the</wd>

<space/>

<wd l="8736" t="6586" r="9379" b="6763">system,</wd>

<space/>

<wd l="9509" t="6614" r="9758" b="6720">we</wd>

<space/>

<wd l="9883" t="6562" r="10286" b="6720">used</wd>

<space/>

<wd l="10406" t="6614" r="10498" b="6720">a</wd>

<space/>

</ln>

<ln l="6134" t="6816" r="10502" b="7018" baseLine="6970" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6816" r="6806" b="6974">random</wd>

<space/>

<wd l="6888" t="6816" r="7387" b="6974">forest</wd>

<space/>

<wd l="7469" t="6816" r="8261" b="6974">classifier</wd>

<space/>

<wd l="8338" t="6816" r="9211" b="7018">(Breiman,</wd>

<space/>

<wd l="9298" t="6816" r="9806" b="7018">2001)</wd>

<space/>

<wd l="9888" t="6816" r="10502" b="7018">mainly</wd>

<space/>

</ln>

<ln l="6130" t="7070" r="10493" b="7272" baseLine="7224" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7070" r="6826" b="7229">because</wd>

<space/>

<wd l="6931" t="7070" r="7123" b="7229">its</wd>

<space/>

<wd l="7229" t="7070" r="7910" b="7272">training</wd>

<space/>

<wd l="8016" t="7070" r="8510" b="7272">speed</wd>

<space/>

<wd l="8611" t="7070" r="8741" b="7229">is</wd>

<space/>

<wd l="8851" t="7070" r="9336" b="7229">faster</wd>

<space/>

<wd l="9432" t="7070" r="9749" b="7229">and</wd>

<space/>

<wd l="9845" t="7070" r="10042" b="7229">its</wd>

<space/>

<wd l="10142" t="7123" r="10493" b="7272">per-</wd>

</ln>

<ln l="6139" t="7320" r="10502" b="7522" baseLine="7474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="7320" r="6960" b="7478">formance</wd>

<space/>

<wd l="7085" t="7320" r="7219" b="7478">is</wd>

<space/>

<wd l="7339" t="7320" r="8170" b="7522">relatively</wd>

<space/>

<wd l="8290" t="7320" r="9216" b="7478">insensitive</wd>

<space/>

<wd l="9336" t="7344" r="9499" b="7478">to</wd>

<space/>

<wd l="9614" t="7344" r="10502" b="7522">parameter</wd>

<space/>

</ln>

<ln l="6134" t="7574" r="10498" b="7776" baseLine="7728" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="7574" r="6739" b="7766">values,</wd>

<space/>

<wd l="6835" t="7574" r="7123" b="7733">but</wd>

<space/>

<wd l="7214" t="7574" r="7666" b="7733">other</wd>

<space/>

<wd l="7742" t="7574" r="8309" b="7776">binary</wd>

<space/>

<wd l="8405" t="7574" r="9557" b="7733">classification</wd>

<space/>

<wd l="9653" t="7574" r="10498" b="7776">algorithm</wd>

<space/>

</ln>

<ln l="6144" t="7829" r="7680" b="7987" baseLine="7982" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="7829" r="6725" b="7987">should</wd>

<space/>

<wd l="6782" t="7829" r="7123" b="7987">also</wd>

<space/>

<wd l="7186" t="7829" r="7680" b="7987">work.</wd>

</ln>

</para>

<para l="6134" t="8078" r="10493" b="8534" alignment="justified" li="72" ri="72" fli="288" lsp="exactly" lspExact="252" language="en">

<ln l="6365" t="8078" r="10493" b="8280" baseLine="8232" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="8078" r="6744" b="8237">This</wd>

<space/>

<wd l="6826" t="8102" r="7157" b="8280">step</wd>

<space/>

<wd l="7234" t="8078" r="7368" b="8237">is</wd>

<space/>

<wd l="7435" t="8078" r="8035" b="8280">mostly</wd>

<space/>

<wd l="8102" t="8078" r="8698" b="8237">feature</wd>

<space/>

<wd l="8770" t="8078" r="9802" b="8280">engineering</wd>

<space/>

<wd l="9869" t="8078" r="10186" b="8237">and</wd>

<space/>

<wd l="10243" t="8131" r="10493" b="8237">we</wd>

<space/>

</ln>

<ln l="6134" t="8333" r="8568" b="8534" baseLine="8486" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="8333" r="6538" b="8491">used</wd>

<space/>

<wd l="6595" t="8333" r="6854" b="8491">the</wd>

<space/>

<wd l="6922" t="8333" r="7766" b="8534">following</wd>

<space/>

<wd l="7834" t="8333" r="8568" b="8491">features:</wd>

</ln>

</para>

<para l="6437" t="8602" r="8798" b="8803" alignment="left" li="360" ri="72" spaceBefore="16" lsp="exactly" lspExact="253" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="8602" r="8798" b="8803" baseLine="8755" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6437" t="8606" r="6725" b="8755">•</wd>

<tab position="6520"/>

<wd l="6725" t="8606" r="7411" b="8803">Support</wd>

<space/>

<wd l="7469" t="8602" r="7781" b="8760">and</wd>

<space/>

<wd l="7843" t="8602" r="8798" b="8760">confidence</wd>

</ln>

</para>

<para l="6418" t="8976" r="10522" b="13968" alignment="justified" li="360" ri="72" spaceBefore="121" lsp="exactly" lspExact="253" language="en">

<ln l="6422" t="8976" r="10502" b="9178" baseLine="9131">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6422" t="8986" r="6725" b="9134">We</wd>

<space/>

<wd l="6811" t="8976" r="7584" b="9134">calculate</wd>

<space/>

<wd l="7666" t="8976" r="7930" b="9134">the</wd>

<space/>

<wd l="8021" t="9000" r="8674" b="9178">support</wd>

<space/>

<wd l="8755" t="8976" r="8957" b="9134">of</wd>

<space/>

<wd l="9010" t="8976" r="9499" b="9134">token</wd>

<space/>

</run>

<wd l="9581" t="9005" r="9677" b="9163"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="9758" t="8976" r="10502" b="9178">(number</wd>

<space/>

</run>

</ln>

<ln l="6427" t="9226" r="10498" b="9427" baseLine="9381">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6427" t="9226" r="6629" b="9384">of</wd>

<space/>

<wd l="6672" t="9226" r="7138" b="9384">times</wd>

<space/>

</run>

<wd l="7219" t="9254" r="7310" b="9413"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="7382" t="9226" r="8117" b="9427">appears)</wd>

<space/>

<wd l="8194" t="9226" r="8506" b="9384">and</wd>

<space/>

<wd l="8573" t="9226" r="9528" b="9384">confidence</wd>

<space/>

<wd l="9605" t="9226" r="9806" b="9384">of</wd>

<space/>

<wd l="9850" t="9226" r="10334" b="9384">token</wd>

<space/>

</run>

<wd l="10406" t="9254" r="10498" b="9413"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

</ln>

<ln l="6418" t="9480" r="10493" b="9682" baseLine="9634">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6418" t="9480" r="6907" b="9682">being</wd>

<space/>

<wd l="7003" t="9480" r="7997" b="9638">normalized</wd>

<space/>

<wd l="8088" t="9504" r="8251" b="9638">to</wd>

<space/>

<wd l="8357" t="9480" r="9187" b="9638">candidate</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="9288" t="9538" r="9379" b="9638">c</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="9480" t="9480" r="10493" b="9682">(percentage</wd>

<space/>

</run>

</ln>

<ln l="6427" t="9734" r="10493" b="9936" baseLine="9889">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6427" t="9734" r="6629" b="9893">of</wd>

<space/>

<wd l="6706" t="9734" r="7176" b="9893">times</wd>

<space/>

</run>

<wd l="7286" t="9763" r="7382" b="9922"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="7488" t="9734" r="7622" b="9893">is</wd>

<space/>

<wd l="7733" t="9734" r="8722" b="9893">normalized</wd>

<space/>

<wd l="8818" t="9758" r="8981" b="9893">to</wd>

<space/>

</run>

<wd l="9096" t="9734" r="9254" b="9936"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">c</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

<wd l="9365" t="9734" r="10224" b="9936">according</wd>

<space/>

<wd l="10330" t="9758" r="10493" b="9893">to</wd>

<space/>

</run>

</ln>

<ln l="6422" t="9989" r="10502" b="10190" baseLine="10142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6422" t="9989" r="7104" b="10190">training</wd>

<space/>

<wd l="7234" t="9989" r="7594" b="10147">data</wd>

<space/>

<wd l="7718" t="9989" r="8030" b="10147">and</wd>

<space/>

<wd l="8150" t="10042" r="8438" b="10147">use</wd>

<space/>

<wd l="8563" t="9989" r="9000" b="10147">them</wd>

<space/>

<wd l="9130" t="10042" r="9298" b="10147">as</wd>

<space/>

<wd l="9432" t="9989" r="10114" b="10147">features</wd>

<space/>

<wd l="10248" t="9989" r="10502" b="10147">for</wd>

<space/>

</ln>

<ln l="6427" t="10238" r="10493" b="10440" baseLine="10392" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6427" t="10238" r="7627" b="10397">classification.</wd>

<space/>

<wd l="7699" t="10248" r="8006" b="10397">For</wd>

<space/>

<wd l="8069" t="10238" r="8851" b="10440">example,</wd>

<space/>

<wd l="8928" t="10238" r="9091" b="10392">in</wd>

<space/>

<wd l="9149" t="10238" r="9413" b="10397">the</wd>

<space/>

<wd l="9475" t="10238" r="10157" b="10440">training</wd>

<space/>

<wd l="10224" t="10238" r="10493" b="10397">da-</wd>

</ln>

<ln l="6422" t="10493" r="10483" b="10694" baseLine="10646" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6422" t="10517" r="6581" b="10651">ta</wd>

<space/>

<wd l="6653" t="10493" r="7214" b="10651">shown</wd>

<space/>

<wd l="7277" t="10493" r="7843" b="10685">above,</wd>

<space/>

<wd l="7915" t="10493" r="8174" b="10651">the</wd>

<space/>

<wd l="8251" t="10517" r="8904" b="10694">support</wd>

<space/>

<wd l="8962" t="10493" r="9163" b="10651">of</wd>

<space/>

<wd l="9202" t="10493" r="9691" b="10651">token</wd>

<space/>

<wd l="9754" t="10498" r="10118" b="10651">“ur”</wd>

<space/>

<wd l="10190" t="10493" r="10325" b="10651">is</wd>

<space/>

<wd l="10397" t="10498" r="10483" b="10651">3</wd>

<space/>

</ln>

<ln l="6427" t="10747" r="10493" b="10949" baseLine="10901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6427" t="10747" r="6744" b="10906">and</wd>

<space/>

<wd l="6864" t="10747" r="7123" b="10906">the</wd>

<space/>

<wd l="7258" t="10747" r="8213" b="10906">confidence</wd>

<space/>

<wd l="8342" t="10747" r="8544" b="10906">of</wd>

<space/>

<wd l="8645" t="10747" r="9706" b="10949">normalizing</wd>

<space/>

<wd l="9835" t="10752" r="10200" b="10906">“ur”</wd>

<space/>

<wd l="10330" t="10771" r="10493" b="10906">to</wd>

<space/>

</ln>

<ln l="6427" t="10997" r="10522" b="11198" baseLine="11150" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6427" t="11002" r="6854" b="11198">“you</wd>

<space/>

<wd l="6955" t="11002" r="7310" b="11155">are”</wd>

<space/>

<wd l="7421" t="10997" r="7555" b="11155">is</wd>

<space/>

<wd l="7666" t="10997" r="7925" b="11155">2/3</wd>

<space/>

<wd l="8045" t="11054" r="8165" b="11102">=</wd>

<space/>

<wd l="8275" t="11002" r="8698" b="11155">0.67.</wd>

<space/>

<wd l="8813" t="10997" r="9144" b="11155">The</wd>

<space/>

<wd l="9254" t="10997" r="10210" b="11155">confidence</wd>

<space/>

<wd l="10320" t="10997" r="10522" b="11155">of</wd>

<space/>

</ln>

<ln l="6422" t="11251" r="10522" b="11453" baseLine="11405" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6422" t="11251" r="7483" b="11453">normalizing</wd>

<space/>

<wd l="7584" t="11256" r="7949" b="11410">“ur”</wd>

<space/>

<wd l="8050" t="11275" r="8208" b="11410">to</wd>

<space/>

<wd l="8314" t="11256" r="8904" b="11453">“your”</wd>

<space/>

<wd l="9005" t="11251" r="9139" b="11410">is</wd>

<space/>

<wd l="9259" t="11251" r="9499" b="11410">1/3</wd>

<space/>

<wd l="9610" t="11309" r="9730" b="11357">=</wd>

<space/>

<wd l="9830" t="11256" r="10253" b="11410">0.33.</wd>

<space/>

<wd l="10358" t="11251" r="10522" b="11405">If</wd>

<space/>

</ln>

<ln l="6422" t="11506" r="10488" b="11707" baseLine="11660">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6422" t="11506" r="6686" b="11664">the</wd>

<space/>

<wd l="6763" t="11506" r="7253" b="11664">token</wd>

<space/>

</run>

<wd l="7330" t="11534" r="7426" b="11693"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="7502" t="11506" r="7637" b="11664">is</wd>

<space/>

<wd l="7718" t="11506" r="8280" b="11664">absent</wd>

<space/>

<wd l="8352" t="11506" r="8520" b="11659">in</wd>

<space/>

<wd l="8592" t="11506" r="8856" b="11664">the</wd>

<space/>

<wd l="8933" t="11506" r="9614" b="11707">training</wd>

<space/>

<wd l="9696" t="11506" r="10099" b="11698">data,</wd>

<space/>

<wd l="10186" t="11558" r="10488" b="11707">e.g.</wd>

<space/>

</run>

</ln>

<ln l="6427" t="11755" r="10493" b="11957" baseLine="11909" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6427" t="11755" r="7262" b="11947">“looove”,</wd>

<space/>

<wd l="7334" t="11755" r="7714" b="11914">then</wd>

<space/>

<wd l="7776" t="11755" r="8035" b="11914">the</wd>

<space/>

<wd l="8117" t="11779" r="8765" b="11957">support</wd>

<space/>

<wd l="8832" t="11755" r="9144" b="11914">and</wd>

<space/>

<wd l="9211" t="11755" r="10162" b="11914">confidence</wd>

<space/>

<wd l="10238" t="11808" r="10493" b="11914">are</wd>

<space/>

</ln>

<ln l="6418" t="12010" r="10493" b="12211" baseLine="12164">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6418" t="12010" r="6816" b="12168">both</wd>

<space/>

<wd l="6917" t="12062" r="7339" b="12168">zero.</wd>

<space/>

<wd l="7459" t="12010" r="7622" b="12163">If</wd>

<space/>

<wd l="7699" t="12010" r="7963" b="12168">the</wd>

<space/>

<wd l="8074" t="12010" r="8563" b="12168">token</wd>

<space/>

</run>

<wd l="8664" t="12038" r="8760" b="12197"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="8866" t="12010" r="9000" b="12168">is</wd>

<space/>

<wd l="9106" t="12034" r="9749" b="12211">present</wd>

<space/>

<wd l="9845" t="12010" r="10133" b="12168">but</wd>

<space/>

<wd l="10229" t="12010" r="10493" b="12168">the</wd>

<space/>

</run>

</ln>

<ln l="6422" t="12264" r="10498" b="12466" baseLine="12419">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6422" t="12264" r="7646" b="12422">normalization</wd>

<space/>

<wd l="7714" t="12264" r="8131" b="12422">from</wd>

<space/>

</run>

<wd l="8203" t="12293" r="8294" b="12451"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="8357" t="12288" r="8520" b="12422">to</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="8597" t="12322" r="8688" b="12422">c</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="8755" t="12264" r="8890" b="12422">is</wd>

<space/>

<wd l="8966" t="12264" r="9523" b="12422">absent</wd>

<space/>

<wd l="9586" t="12264" r="9754" b="12418">in</wd>

<space/>

<wd l="9816" t="12264" r="10498" b="12466">training</wd>

<space/>

</run>

</ln>

<ln l="6427" t="12518" r="10493" b="12720" baseLine="12672" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6427" t="12518" r="6835" b="12710">data,</wd>

<space/>

<wd l="6922" t="12518" r="7301" b="12677">then</wd>

<space/>

<wd l="7387" t="12518" r="7771" b="12720">only</wd>

<space/>

<wd l="7848" t="12518" r="8112" b="12677">the</wd>

<space/>

<wd l="8198" t="12518" r="9154" b="12677">confidence</wd>

<space/>

<wd l="9245" t="12518" r="9374" b="12677">is</wd>

<space/>

<wd l="9466" t="12571" r="9883" b="12677">zero.</wd>

<space/>

<wd l="9979" t="12518" r="10493" b="12677">These</wd>

<space/>

</ln>

<ln l="6427" t="12768" r="10493" b="12926" baseLine="12922" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6427" t="12768" r="7114" b="12926">features</wd>

<space/>

<wd l="7219" t="12821" r="7474" b="12926">are</wd>

<space/>

<wd l="7579" t="12792" r="8222" b="12926">context</wd>

<space/>

<wd l="8318" t="12768" r="8650" b="12926">free</wd>

<space/>

<wd l="8750" t="12768" r="9062" b="12926">and</wd>

<space/>

<wd l="9154" t="12768" r="9418" b="12926">the</wd>

<space/>

<wd l="9518" t="12768" r="10262" b="12926">intuition</wd>

<space/>

<wd l="10358" t="12768" r="10493" b="12926">is</wd>

<space/>

</ln>

<ln l="6422" t="13022" r="10493" b="13224" baseLine="13176" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6422" t="13022" r="6754" b="13181">that</wd>

<space/>

<wd l="6816" t="13022" r="7080" b="13181">the</wd>

<space/>

<wd l="7147" t="13022" r="7714" b="13224">higher</wd>

<space/>

<wd l="7771" t="13022" r="8035" b="13181">the</wd>

<space/>

<wd l="8112" t="13046" r="8765" b="13224">support</wd>

<space/>

<wd l="8827" t="13022" r="9144" b="13181">and</wd>

<space/>

<wd l="9211" t="13022" r="10162" b="13181">confidence</wd>

<space/>

<wd l="10238" t="13075" r="10493" b="13181">are</wd>

<space/>

</ln>

<ln l="6427" t="13277" r="10498" b="13478" baseLine="13430" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6427" t="13277" r="6888" b="13478">(high</wd>

<space/>

<wd l="7003" t="13301" r="7656" b="13478">support</wd>

<space/>

<wd l="7766" t="13277" r="7896" b="13435">is</wd>

<space/>

<wd l="8011" t="13330" r="8866" b="13478">necessary</wd>

<space/>

<wd l="8976" t="13277" r="9144" b="13430">in</wd>

<space/>

<wd l="9254" t="13330" r="9624" b="13435">case</wd>

<space/>

<wd l="9739" t="13277" r="9941" b="13435">of</wd>

<space/>

<wd l="10032" t="13277" r="10498" b="13435">small</wd>

<space/>

</ln>

<ln l="6432" t="13526" r="10502" b="13728" baseLine="13680">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6432" t="13526" r="7162" b="13728">sample),</wd>

<space/>

<wd l="7258" t="13526" r="7517" b="13685">the</wd>

<space/>

<wd l="7608" t="13579" r="8054" b="13685">more</wd>

<space/>

<wd l="8146" t="13526" r="8640" b="13728">likely</wd>

<space/>

<wd l="8722" t="13526" r="9053" b="13685">that</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="9139" t="13584" r="9230" b="13685">c</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="9317" t="13526" r="9451" b="13685">is</wd>

<space/>

<wd l="9538" t="13526" r="9802" b="13685">the</wd>

<space/>

<wd l="9893" t="13550" r="10502" b="13685">correct</wd>

<space/>

</run>

</ln>

<ln l="6427" t="13781" r="8184" b="13968" baseLine="13935">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6427" t="13781" r="7262" b="13939">canonical</wd>

<space/>

<wd l="7325" t="13781" r="7747" b="13939">form</wd>

<space/>

<wd l="7810" t="13781" r="8011" b="13939">of</wd>

<space/>

</run>

<wd l="8050" t="13810" r="8184" b="13968"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">i</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6437" t="14170" r="8347" b="14371" alignment="left" li="360" ri="72" spaceBefore="134" lsp="exactly" lspExact="253" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="14170" r="8347" b="14371" baseLine="14323" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6437" t="14174" r="6725" b="14323">•</wd>

<tab position="6520"/>

<wd l="6725" t="14170" r="7248" b="14371">String</wd>

<space/>

<wd l="7310" t="14170" r="8347" b="14328">information</wd>

</ln>

</para>

<para l="6422" t="14544" r="10502" b="15206" alignment="justified" li="360" ri="72" spaceBefore="121" lsp="exactly" lspExact="249" language="en">

<ln l="6422" t="14544" r="10493" b="14746" baseLine="14698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6422" t="14554" r="6725" b="14702">We</wd>

<space/>

<wd l="6816" t="14544" r="7584" b="14702">calculate</wd>

<space/>

<wd l="7675" t="14544" r="7934" b="14702">the</wd>

<space/>

<wd l="8030" t="14544" r="8520" b="14746">string</wd>

<space/>

<wd l="8616" t="14544" r="9446" b="14746">similarity</wd>

<space/>

<wd l="9538" t="14597" r="9989" b="14702">score</wd>

<space/>

<wd l="10075" t="14544" r="10493" b="14746">(Jac-</wd>

</ln>

<ln l="6427" t="14798" r="10498" b="15000" baseLine="14953">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6427" t="14798" r="6802" b="14957">card</wd>

<space/>

<wd l="6902" t="14798" r="7397" b="14957">Index</wd>

<space/>

<wd l="7502" t="14798" r="7704" b="14957">of</wd>

<space/>

<wd l="7781" t="14798" r="8381" b="14957">feature</wd>

<space/>

<wd l="8496" t="14798" r="8880" b="15000">sets)</wd>

<space/>

<wd l="8981" t="14798" r="9720" b="14957">between</wd>

<space/>

<wd l="9816" t="14798" r="10306" b="14957">token</wd>

<space/>

</run>

<wd l="10406" t="14827" r="10498" b="14986"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6427" t="15048" r="10502" b="15206" baseLine="15202">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6427" t="15048" r="6744" b="15206">and</wd>

<space/>

<wd l="6850" t="15048" r="7680" b="15206">candidate</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7795" t="15106" r="7886" b="15206">c</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8002" t="15048" r="8314" b="15206">and</wd>

<space/>

<wd l="8414" t="15101" r="8702" b="15206">use</wd>

<space/>

<wd l="8818" t="15048" r="8938" b="15206">it</wd>

<space/>

<wd l="9043" t="15101" r="9216" b="15206">as</wd>

<space/>

<wd l="9331" t="15101" r="9422" b="15206">a</wd>

<space/>

<wd l="9533" t="15048" r="10133" b="15206">feature</wd>

<space/>

<wd l="10248" t="15048" r="10502" b="15206">for</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="5776" t="15746" r="6176" b="15975">

<para l="5809" t="15792" r="6143" b="15946" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5875" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="28">

<wd l="5875" t="15792" r="6077" b="15946">89</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4313.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1323" marginTop="1430" marginRight="1283" marginBottom="1302" offsetX="-6" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1323" t="1430" r="10626" b="15439">

<column l="1323" t="1430" r="5917" b="15071">

<para l="1709" t="1474" r="5794" b="3955" alignment="justified" li="360" ri="72" lsp="exactly" lspExact="253" language="en">

<ln l="1718" t="1474" r="5794" b="1675" baseLine="1627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="1474" r="2918" b="1632">classification.</wd>

<space/>

<wd l="2995" t="1474" r="3518" b="1675">String</wd>

<space/>

<wd l="3590" t="1474" r="4421" b="1675">similarity</wd>

<space/>

<wd l="4488" t="1526" r="4934" b="1632">score</wd>

<space/>

<wd l="5002" t="1474" r="5136" b="1632">is</wd>

<space/>

<wd l="5203" t="1526" r="5294" b="1632">a</wd>

<space/>

<wd l="5357" t="1474" r="5794" b="1675">good</wd>

<space/>

</ln>

<ln l="1718" t="1728" r="5784" b="1886" baseLine="1882" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="1728" r="2318" b="1886">feature</wd>

<space/>

<wd l="2424" t="1728" r="2683" b="1886">for</wd>

<space/>

<wd l="2779" t="1728" r="3658" b="1886">difference</wd>

<space/>

<wd l="3754" t="1728" r="4493" b="1886">between</wd>

<space/>

<wd l="4589" t="1728" r="5078" b="1886">token</wd>

<space/>

<wd l="5174" t="1728" r="5491" b="1886">and</wd>

<space/>

<wd l="5587" t="1728" r="5784" b="1886">its</wd>

<space/>

</ln>

<ln l="1718" t="1982" r="5784" b="2184" baseLine="2136" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="1982" r="2554" b="2141">canonical</wd>

<space/>

<wd l="2626" t="1982" r="3043" b="2141">form</wd>

<space/>

<wd l="3110" t="1982" r="3706" b="2141">caused</wd>

<space/>

<wd l="3758" t="1982" r="3984" b="2184">by</wd>

<space/>

<wd l="4046" t="1982" r="5054" b="2184">misspelling</wd>

<space/>

<wd l="5126" t="1982" r="5453" b="2184">(for</wd>

<space/>

<wd l="5515" t="2035" r="5784" b="2141">ex-</wd>

</ln>

<ln l="1718" t="2232" r="5794" b="2438" baseLine="2390">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1718" t="2237" r="2294" b="2438">ample,</wd>

<space/>

<wd l="2381" t="2242" r="3283" b="2438">“seperate”</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="3374" t="2232" r="3557" b="2390">4</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3648" t="2237" r="4675" b="2438">“separate”),</wd>

<space/>

<wd l="4752" t="2237" r="5040" b="2395">but</wd>

<space/>

<wd l="5112" t="2237" r="5227" b="2395">it</wd>

<space/>

<wd l="5299" t="2237" r="5434" b="2395">is</wd>

<space/>

<wd l="5510" t="2261" r="5794" b="2395">not</wd>

<space/>

</run>

</ln>

<ln l="1718" t="2486" r="5784" b="2688" baseLine="2640" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="2539" r="1810" b="2645">a</wd>

<space/>

<wd l="1915" t="2486" r="2352" b="2688">good</wd>

<space/>

<wd l="2453" t="2486" r="3053" b="2645">feature</wd>

<space/>

<wd l="3158" t="2486" r="3418" b="2645">for</wd>

<space/>

<wd l="3514" t="2486" r="4392" b="2645">difference</wd>

<space/>

<wd l="4502" t="2486" r="5098" b="2645">caused</wd>

<space/>

<wd l="5189" t="2486" r="5414" b="2688">by</wd>

<space/>

<wd l="5515" t="2486" r="5784" b="2645">ab-</wd>

</ln>

<ln l="1709" t="2736" r="5789" b="2942" baseLine="2894">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1709" t="2741" r="2606" b="2899">breviation</wd>

<space/>

<wd l="2722" t="2741" r="3048" b="2942">(for</wd>

<space/>

<wd l="3158" t="2741" r="3946" b="2942">example,</wd>

<space/>

<wd l="4070" t="2741" r="4483" b="2899">“lol”</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="4613" t="2736" r="4795" b="2894">4</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4930" t="2741" r="5789" b="2942">“laughing</wd>

<space/>

</run>

</ln>

<ln l="1718" t="2995" r="5789" b="3197" baseLine="3149" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="3019" r="1997" b="3154">out</wd>

<space/>

<wd l="2141" t="2995" r="2741" b="3197">loud”).</wd>

<space/>

<wd l="2894" t="2995" r="3802" b="3187">Therefore,</wd>

<space/>

<wd l="3950" t="3048" r="4200" b="3154">we</wd>

<space/>

<wd l="4349" t="2995" r="4690" b="3154">also</wd>

<space/>

<wd l="4838" t="2995" r="5155" b="3154">add</wd>

<space/>

<wd l="5299" t="2995" r="5789" b="3197">string</wd>

<space/>

</ln>

<ln l="1718" t="3245" r="5794" b="3446" baseLine="3398" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="3245" r="2266" b="3446">length</wd>

<space/>

<wd l="2328" t="3245" r="2645" b="3403">and</wd>

<space/>

<wd l="2707" t="3245" r="3590" b="3403">difference</wd>

<space/>

<wd l="3662" t="3245" r="3826" b="3398">in</wd>

<space/>

<wd l="3898" t="3245" r="4382" b="3446">string</wd>

<space/>

<wd l="4454" t="3245" r="4997" b="3446">length</wd>

<space/>

<wd l="5054" t="3245" r="5794" b="3403">between</wd>

<space/>

</ln>

<ln l="1718" t="3499" r="5789" b="3701" baseLine="3654">

<wd l="1718" t="3528" r="1814" b="3686"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1886" t="3499" r="2198" b="3658">and</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2270" t="3557" r="2362" b="3658">c</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2438" t="3552" r="2616" b="3658">so</wd>

<space/>

<wd l="2688" t="3499" r="3019" b="3658">that</wd>

<space/>

<wd l="3091" t="3499" r="3883" b="3658">classifier</wd>

<space/>

<wd l="3950" t="3552" r="4253" b="3658">can</wd>

<space/>

<wd l="4320" t="3499" r="4920" b="3658">choose</wd>

<space/>

<wd l="4992" t="3523" r="5155" b="3658">to</wd>

<space/>

<wd l="5237" t="3499" r="5789" b="3701">ignore</wd>

<space/>

</run>

</ln>

<ln l="1723" t="3754" r="5122" b="3955" baseLine="3907" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1723" t="3754" r="2213" b="3955">string</wd>

<space/>

<wd l="2280" t="3754" r="3115" b="3955">similarity</wd>

<space/>

<wd l="3182" t="3806" r="3629" b="3912">score</wd>

<space/>

<wd l="3691" t="3754" r="4166" b="3912">when</wd>

<space/>

<wd l="4219" t="3806" r="5122" b="3955">necessary.</wd>

</ln>

</para>

<para l="1714" t="4123" r="5794" b="4536" alignment="justified" li="360" ri="72" spaceBefore="116" lsp="exactly" lspExact="254" language="en">

<ln l="1714" t="4123" r="5794" b="4325" baseLine="4277" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1714" t="4123" r="1992" b="4277">All</wd>

<space/>

<wd l="2136" t="4123" r="2621" b="4325">string</wd>

<space/>

<wd l="2760" t="4123" r="3792" b="4282">information</wd>

<space/>

<wd l="3926" t="4123" r="4608" b="4282">features</wd>

<space/>

<wd l="4752" t="4176" r="5011" b="4282">are</wd>

<space/>

<wd l="5150" t="4147" r="5794" b="4282">context</wd>

<space/>

</ln>

<ln l="1718" t="4378" r="2102" b="4536" baseLine="4531" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="1718" t="4378" r="2102" b="4536">free.</wd>

</ln>

</para>

<para l="1728" t="4766" r="4219" b="4968" alignment="left" li="360" spaceBefore="135" lsp="exactly" lspExact="254" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1728" t="4766" r="4219" b="4968" baseLine="4920" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1728" t="4776" r="2006" b="4920">•</wd>

<tab position="1809"/>

<wd l="2006" t="4771" r="2395" b="4925">POS</wd>

<space/>

<wd l="2462" t="4766" r="3120" b="4968">tagging</wd>

<space/>

<wd l="3182" t="4766" r="4219" b="4925">information</wd>

</ln>

</para>

<para l="1714" t="5141" r="5813" b="8587" alignment="justified" li="360" ri="72" spaceBefore="106" lsp="exactly" lspExact="254" language="en">

<ln l="1718" t="5141" r="5794" b="5299" baseLine="5294" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="5146" r="2074" b="5299">One</wd>

<space/>

<wd l="2170" t="5141" r="2366" b="5299">of</wd>

<space/>

<wd l="2429" t="5141" r="2688" b="5299">the</wd>

<space/>

<wd l="2774" t="5141" r="3806" b="5299">motivations</wd>

<space/>

<wd l="3902" t="5141" r="4104" b="5299">of</wd>

<space/>

<wd l="4162" t="5165" r="4493" b="5299">text</wd>

<space/>

<wd l="4570" t="5141" r="5794" b="5299">normalization</wd>

<space/>

</ln>

<ln l="1718" t="5395" r="5784" b="5597" baseLine="5549" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="5395" r="1853" b="5554">is</wd>

<space/>

<wd l="1934" t="5419" r="2098" b="5554">to</wd>

<space/>

<wd l="2184" t="5395" r="2942" b="5554">facilitate</wd>

<space/>

<wd l="3034" t="5395" r="4003" b="5597">subsequent</wd>

<space/>

<wd l="4075" t="5395" r="4555" b="5587">tasks,</wd>

<space/>

<wd l="4651" t="5395" r="5045" b="5554">such</wd>

<space/>

<wd l="5122" t="5448" r="5294" b="5554">as</wd>

<space/>

<wd l="5371" t="5419" r="5784" b="5597">part-</wd>

</ln>

<ln l="1718" t="5645" r="5784" b="5846" baseLine="5798" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="5645" r="2573" b="5846">of-speech</wd>

<space/>

<wd l="2654" t="5645" r="3312" b="5846">tagging</wd>

<space/>

<wd l="3403" t="5645" r="3720" b="5803">and</wd>

<space/>

<wd l="3802" t="5645" r="4387" b="5803">named</wd>

<space/>

<wd l="4478" t="5645" r="4978" b="5846">entity</wd>

<space/>

<wd l="5059" t="5645" r="5784" b="5846">recogni-</wd>

</ln>

<ln l="1714" t="5899" r="5794" b="6101" baseLine="6053" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="5899" r="2102" b="6058">tion.</wd>

<space/>

<wd l="2309" t="5899" r="3216" b="6091">Therefore,</wd>

<space/>

<wd l="3422" t="5899" r="3859" b="6101">good</wd>

<space/>

<wd l="4046" t="5923" r="4378" b="6058">text</wd>

<space/>

<wd l="4570" t="5899" r="5794" b="6058">normalization</wd>

<space/>

</ln>

<ln l="1723" t="6154" r="5784" b="6355" baseLine="6307" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1723" t="6154" r="2304" b="6312">should</wd>

<space/>

<wd l="2376" t="6154" r="2846" b="6312">make</wd>

<space/>

<wd l="2928" t="6154" r="3192" b="6312">the</wd>

<space/>

<wd l="3283" t="6154" r="4253" b="6355">subsequent</wd>

<space/>

<wd l="4330" t="6154" r="4762" b="6312">tasks</wd>

<space/>

<wd l="4848" t="6154" r="5400" b="6312">easier.</wd>

<space/>

<wd l="5486" t="6163" r="5784" b="6312">We</wd>

<space/>

</ln>

<ln l="1718" t="6403" r="5813" b="6605" baseLine="6557" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="6403" r="2510" b="6562">observed</wd>

<space/>

<wd l="2592" t="6403" r="2923" b="6562">that</wd>

<space/>

<wd l="3005" t="6403" r="3173" b="6557">in</wd>

<space/>

<wd l="3254" t="6403" r="3518" b="6562">the</wd>

<space/>

<wd l="3605" t="6403" r="4282" b="6605">training</wd>

<space/>

<wd l="4373" t="6403" r="4781" b="6595">data,</wd>

<space/>

<wd l="4877" t="6403" r="5045" b="6557">in</wd>

<space/>

<wd l="5126" t="6408" r="5515" b="6566">90%</wd>

<space/>

<wd l="5611" t="6403" r="5813" b="6562">of</wd>

<space/>

</ln>

<ln l="1714" t="6658" r="5784" b="6816" baseLine="6811" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="6658" r="1978" b="6816">the</wd>

<space/>

<wd l="2069" t="6710" r="2520" b="6816">cases</wd>

<space/>

<wd l="2606" t="6658" r="3134" b="6816">where</wd>

<space/>

<wd l="3226" t="6710" r="3317" b="6816">a</wd>

<space/>

<wd l="3398" t="6658" r="3888" b="6816">token</wd>

<space/>

<wd l="3970" t="6658" r="4104" b="6816">is</wd>

<space/>

<wd l="4190" t="6658" r="5179" b="6816">normalized</wd>

<space/>

<wd l="5261" t="6682" r="5419" b="6816">to</wd>

<space/>

<wd l="5515" t="6710" r="5784" b="6816">an-</wd>

</ln>

<ln l="1718" t="6912" r="5794" b="7114" baseLine="7066" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="6912" r="2170" b="7070">other</wd>

<space/>

<wd l="2280" t="6912" r="2813" b="7104">token,</wd>

<space/>

<wd l="2942" t="6912" r="3202" b="7070">the</wd>

<space/>

<wd l="3326" t="6912" r="4162" b="7070">canonical</wd>

<space/>

<wd l="4286" t="6912" r="4704" b="7070">form</wd>

<space/>

<wd l="4824" t="6912" r="5107" b="7070">has</wd>

<space/>

<wd l="5227" t="6912" r="5794" b="7114">higher</wd>

<space/>

</ln>

<ln l="1714" t="7162" r="5798" b="7363" baseLine="7315">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1714" t="7166" r="2107" b="7320">POS</wd>

<space/>

<wd l="2237" t="7162" r="2894" b="7363">tagging</wd>

<space/>

<wd l="3019" t="7162" r="4022" b="7354">confidence,</wd>

<space/>

<wd l="4147" t="7162" r="4656" b="7320">based</wd>

<space/>

<wd l="4776" t="7214" r="4992" b="7320">on</wd>

<space/>

<wd l="5112" t="7162" r="5376" b="7320">the</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5501" t="7162" r="5798" b="7320">ark</wd>

<space/>

</run>

</ln>

<ln l="1714" t="7416" r="5784" b="7618" baseLine="7570" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="7421" r="2107" b="7574">POS</wd>

<space/>

<wd l="2227" t="7440" r="2779" b="7618">tagger</wd>

<space/>

<wd l="2885" t="7416" r="3610" b="7618">(Gimpel</wd>

<space/>

<wd l="3725" t="7440" r="3883" b="7574">et</wd>

<space/>

<wd l="3994" t="7416" r="4243" b="7608">al.,</wd>

<space/>

<wd l="4363" t="7416" r="4920" b="7618">2011),</wd>

<space/>

<wd l="5035" t="7416" r="5410" b="7574">than</wd>

<space/>

<wd l="5525" t="7416" r="5784" b="7574">the</wd>

<space/>

</ln>

<ln l="1718" t="7670" r="5779" b="7872" baseLine="7824" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="7670" r="2443" b="7872">original.</wd>

<space/>

<wd l="2578" t="7670" r="3437" b="7829">Therefore</wd>

<space/>

<wd l="3562" t="7723" r="3811" b="7829">we</wd>

<space/>

<wd l="3941" t="7723" r="4229" b="7829">use</wd>

<space/>

<wd l="4358" t="7670" r="4968" b="7872">change</wd>

<space/>

<wd l="5102" t="7670" r="5270" b="7824">in</wd>

<space/>

<wd l="5390" t="7675" r="5779" b="7829">POS</wd>

<space/>

</ln>

<ln l="1714" t="7925" r="5784" b="8126" baseLine="8078">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1714" t="7925" r="2371" b="8126">tagging</wd>

<space/>

<wd l="2438" t="7925" r="3394" b="8083">confidence</wd>

<space/>

<wd l="3461" t="7949" r="3619" b="8083">at</wd>

<space/>

<wd l="3672" t="7925" r="4387" b="8126">position</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4450" t="7939" r="4507" b="8083">i</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4570" t="7925" r="4738" b="8078">in</wd>

<space/>

<wd l="4795" t="7949" r="5270" b="8083">tweet</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5342" t="7934" r="5472" b="8078">T</wd>

<space/>

</run>

<wd l="5506" t="7925" r="5784" b="8083" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">be-</wd>

</ln>

<ln l="1718" t="8174" r="5784" b="8376" baseLine="8329">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1718" t="8174" r="2064" b="8333">fore</wd>

<space/>

<wd l="2150" t="8174" r="2462" b="8333">and</wd>

<space/>

<wd l="2544" t="8174" r="2942" b="8333">after</wd>

<space/>

<wd l="3014" t="8174" r="4075" b="8376">normalizing</wd>

<space/>

</run>

<wd l="4162" t="8203" r="4253" b="8362"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4330" t="8198" r="4493" b="8333">to</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4579" t="8232" r="4670" b="8333">c</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4752" t="8227" r="4925" b="8333">as</wd>

<space/>

<wd l="5011" t="8227" r="5102" b="8333">a</wd>

<space/>

<wd l="5184" t="8174" r="5784" b="8333">feature</wd>

<space/>

</run>

</ln>

<ln l="1718" t="8429" r="3230" b="8587" baseLine="8582" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="8429" r="1978" b="8587">for</wd>

<space/>

<wd l="2030" t="8429" r="3230" b="8587">classification.</wd>

</ln>

</para>

<para l="1714" t="8803" r="5789" b="11026" alignment="justified" li="360" ri="72" spaceBefore="110" lsp="exactly" lspExact="254" language="en">

<ln l="1714" t="8803" r="5789" b="9005" baseLine="8957" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="8813" r="2016" b="8962">We</wd>

<space/>

<wd l="2088" t="8803" r="2429" b="8962">also</wd>

<space/>

<wd l="2506" t="8803" r="3139" b="8962">include</wd>

<space/>

<wd l="3216" t="8803" r="3826" b="9005">change</wd>

<space/>

<wd l="3898" t="8803" r="4066" b="8957">in</wd>

<space/>

<wd l="4128" t="8856" r="4598" b="8962">mean</wd>

<space/>

<wd l="4666" t="8808" r="5059" b="8962">POS</wd>

<space/>

<wd l="5131" t="8803" r="5789" b="9005">tagging</wd>

<space/>

</ln>

<ln l="1718" t="9053" r="5784" b="9254" baseLine="9206">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1718" t="9053" r="2674" b="9211">confidence</wd>

<space/>

<wd l="2774" t="9053" r="2938" b="9206">in</wd>

<space/>

<wd l="3024" t="9077" r="3504" b="9211">tweet</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3605" t="9062" r="3734" b="9206">T</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3792" t="9053" r="4488" b="9211">because</wd>

<space/>

<wd l="4584" t="9053" r="5381" b="9254">changing</wd>

<space/>

<wd l="5477" t="9106" r="5784" b="9211">one</wd>

<space/>

</run>

</ln>

<ln l="1714" t="9307" r="5784" b="9509" baseLine="9461" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="9307" r="2203" b="9466">token</wd>

<space/>

<wd l="2328" t="9360" r="2621" b="9466">can</wd>

<space/>

<wd l="2750" t="9307" r="3245" b="9466">affect</wd>

<space/>

<wd l="3360" t="9307" r="3624" b="9466">the</wd>

<space/>

<wd l="3754" t="9307" r="4704" b="9466">confidence</wd>

<space/>

<wd l="4834" t="9307" r="5035" b="9466">of</wd>

<space/>

<wd l="5131" t="9307" r="5784" b="9509">tagging</wd>

<space/>

</ln>

<ln l="1718" t="9562" r="5779" b="9763" baseLine="9715" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="9562" r="2170" b="9720">other</wd>

<space/>

<wd l="2270" t="9562" r="2890" b="9720">tokens.</wd>

<space/>

<wd l="3010" t="9571" r="3187" b="9715">In</wd>

<space/>

<wd l="3293" t="9562" r="4013" b="9720">addition</wd>

<space/>

<wd l="4114" t="9586" r="4277" b="9720">to</wd>

<space/>

<wd l="4392" t="9562" r="5006" b="9763">change</wd>

<space/>

<wd l="5117" t="9562" r="5285" b="9715">in</wd>

<space/>

<wd l="5390" t="9566" r="5779" b="9720">POS</wd>

<space/>

</ln>

<ln l="1714" t="9816" r="5784" b="10018" baseLine="9970" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="9816" r="2371" b="10018">tagging</wd>

<space/>

<wd l="2472" t="9816" r="3480" b="10008">confidence,</wd>

<space/>

<wd l="3586" t="9869" r="3835" b="9974">we</wd>

<space/>

<wd l="3936" t="9869" r="4219" b="9974">use</wd>

<space/>

<wd l="4325" t="9821" r="4714" b="9974">POS</wd>

<space/>

<wd l="4819" t="9840" r="5165" b="10018">tags</wd>

<space/>

<wd l="5275" t="9816" r="5477" b="9974">of</wd>

<space/>

<wd l="5549" t="9840" r="5784" b="9974">to-</wd>

</ln>

<ln l="1714" t="10066" r="5784" b="10267" baseLine="10223">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1714" t="10066" r="2112" b="10224">kens</wd>

<space/>

</run>

<wd l="2189" t="10094" r="2395" b="10253"><run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="subscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i-1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2477" t="10066" r="2789" b="10224">and</wd>

<space/>

</run>

<wd l="2861" t="10094" r="2952" b="10253"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="10118" r="3197" b="10224">as</wd>

<space/>

<wd l="3278" t="10066" r="3960" b="10224">features</wd>

<space/>

<wd l="4042" t="10066" r="4373" b="10267">(tag</wd>

<space/>

<wd l="4450" t="10066" r="4584" b="10224">is</wd>

<space/>

<wd l="4666" t="10090" r="5208" b="10267">empty</wd>

<space/>

<wd l="5280" t="10066" r="5434" b="10219">if</wd>

<space/>

</run>

<wd l="5482" t="10094" r="5578" b="10253"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">i</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5650" t="10066" r="5784" b="10224">is</wd>

<space/>

</run>

</ln>

<ln l="1714" t="10320" r="5784" b="10522" baseLine="10474" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="10320" r="1978" b="10478">the</wd>

<space/>

<wd l="2069" t="10320" r="2419" b="10478">first</wd>

<space/>

<wd l="2496" t="10320" r="3048" b="10522">token)</wd>

<space/>

<wd l="3130" t="10320" r="3826" b="10478">because</wd>

<space/>

<wd l="3907" t="10320" r="4344" b="10478">there</wd>

<space/>

<wd l="4430" t="10373" r="4733" b="10478">can</wd>

<space/>

<wd l="4805" t="10320" r="5011" b="10478">be</wd>

<space/>

<wd l="5088" t="10344" r="5784" b="10522">patterns</wd>

<space/>

</ln>

<ln l="1718" t="10574" r="5784" b="10776" baseLine="10728" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="10574" r="1920" b="10733">of</wd>

<space/>

<wd l="2006" t="10574" r="3034" b="10733">consecutive</wd>

<space/>

<wd l="3144" t="10579" r="3533" b="10733">POS</wd>

<space/>

<wd l="3648" t="10598" r="3994" b="10776">tags</wd>

<space/>

<wd l="4114" t="10574" r="4426" b="10733">and</wd>

<space/>

<wd l="4536" t="10627" r="4987" b="10733">some</wd>

<space/>

<wd l="5088" t="10598" r="5784" b="10776">patterns</wd>

<space/>

</ln>

<ln l="1718" t="10824" r="4891" b="11026" baseLine="10978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="10877" r="1978" b="10982">are</wd>

<space/>

<wd l="2040" t="10824" r="2530" b="10982">much</wd>

<space/>

<wd l="2582" t="10877" r="3029" b="10982">more</wd>

<space/>

<wd l="3096" t="10824" r="3826" b="11026">frequent</wd>

<space/>

<wd l="3878" t="10824" r="4258" b="10982">than</wd>

<space/>

<wd l="4315" t="10824" r="4891" b="10982">others.</wd>

</ln>

</para>

<para l="1714" t="11198" r="5784" b="11611" alignment="justified" li="360" ri="72" spaceBefore="120" lsp="exactly" lspExact="254" language="en">

<ln l="1714" t="11198" r="5784" b="11400" baseLine="11352" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="11198" r="1992" b="11352">All</wd>

<space/>

<wd l="2098" t="11203" r="2486" b="11357">POS</wd>

<space/>

<wd l="2597" t="11198" r="3254" b="11400">tagging</wd>

<space/>

<wd l="3360" t="11198" r="4046" b="11357">features</wd>

<space/>

<wd l="4152" t="11251" r="4440" b="11357">use</wd>

<space/>

<wd l="4550" t="11222" r="5194" b="11357">context</wd>

<space/>

<wd l="5294" t="11198" r="5784" b="11357">infor-</wd>

</ln>

<ln l="1714" t="11453" r="2371" b="11611" baseLine="11606" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="11453" r="2371" b="11611">mation.</wd>

</ln>

</para>

<para l="1430" t="11822" r="5784" b="12235" alignment="justified" li="72" ri="72" spaceBefore="116" fli="288" lsp="exactly" lspExact="251" language="en">

<ln l="1656" t="11822" r="5784" b="12024" baseLine="11976" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="11822" r="1987" b="11981">The</wd>

<space/>

<wd l="2064" t="11822" r="3043" b="12024">importance</wd>

<space/>

<wd l="3115" t="11822" r="3317" b="11981">of</wd>

<space/>

<wd l="3355" t="11822" r="3802" b="11981">these</wd>

<space/>

<wd l="3878" t="11822" r="5030" b="11981">classification</wd>

<space/>

<wd l="5098" t="11822" r="5784" b="11981">features</wd>

<space/>

</ln>

<ln l="1430" t="12077" r="3744" b="12235" baseLine="12230" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12130" r="1690" b="12235">are</wd>

<space/>

<wd l="1757" t="12077" r="2592" b="12235">evaluated</wd>

<space/>

<wd l="2654" t="12077" r="2818" b="12230">in</wd>

<space/>

<wd l="2885" t="12077" r="3533" b="12235">Section</wd>

<space/>

<wd l="3590" t="12082" r="3744" b="12235">4.</wd>

</ln>

</para>

<para l="1421" t="12331" r="5794" b="15062" alignment="justified" li="72" ri="72" fli="288" lsp="exactly" lspExact="252" language="en">

<ln l="1656" t="12331" r="5784" b="12533" baseLine="12485" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="12341" r="1886" b="12490">To</wd>

<space/>

<wd l="1973" t="12331" r="2376" b="12490">train</wd>

<space/>

<wd l="2448" t="12331" r="2712" b="12490">the</wd>

<space/>

<wd l="2794" t="12331" r="3629" b="12523">classifier,</wd>

<space/>

<wd l="3715" t="12384" r="3965" b="12490">we</wd>

<space/>

<wd l="4046" t="12355" r="4781" b="12533">generate</wd>

<space/>

<wd l="4867" t="12331" r="5784" b="12490">candidates</wd>

<space/>

</ln>

<ln l="1430" t="12586" r="5794" b="12787" baseLine="12739" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12586" r="1685" b="12744">for</wd>

<space/>

<wd l="1742" t="12586" r="2141" b="12744">each</wd>

<space/>

<wd l="2198" t="12586" r="2688" b="12744">token</wd>

<space/>

<wd l="2746" t="12586" r="2914" b="12739">in</wd>

<space/>

<wd l="2971" t="12586" r="3648" b="12787">training</wd>

<space/>

<wd l="3715" t="12586" r="4075" b="12744">data</wd>

<space/>

<wd l="4138" t="12586" r="4450" b="12744">and</wd>

<space/>

<wd l="4512" t="12586" r="4930" b="12744">label</wd>

<space/>

<wd l="4997" t="12586" r="5395" b="12744">each</wd>

<space/>

<wd l="5443" t="12586" r="5794" b="12787">pair</wd>

<space/>

</ln>

<ln l="1430" t="12835" r="5784" b="13037" baseLine="12989" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12835" r="2290" b="13037">according</wd>

<space/>

<wd l="2376" t="12859" r="2534" b="12994">to</wd>

<space/>

<wd l="2626" t="12835" r="3221" b="12994">human</wd>

<space/>

<wd l="3312" t="12835" r="4277" b="12994">annotation.</wd>

<space/>

<wd l="4378" t="12835" r="4541" b="12989">If</wd>

<space/>

<wd l="4598" t="12835" r="4862" b="12994">the</wd>

<space/>

<wd l="4954" t="12835" r="5784" b="12994">candidate</wd>

<space/>

</ln>

<ln l="1430" t="13090" r="5784" b="13248" baseLine="13243" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="13090" r="1565" b="13248">is</wd>

<space/>

<wd l="1651" t="13090" r="1910" b="13248">the</wd>

<space/>

<wd l="2002" t="13114" r="2606" b="13248">correct</wd>

<space/>

<wd l="2688" t="13090" r="3518" b="13248">canonical</wd>

<space/>

<wd l="3605" t="13090" r="4027" b="13248">form</wd>

<space/>

<wd l="4109" t="13090" r="4310" b="13248">of</wd>

<space/>

<wd l="4363" t="13090" r="4627" b="13248">the</wd>

<space/>

<wd l="4709" t="13090" r="5198" b="13248">token</wd>

<space/>

<wd l="5280" t="13090" r="5448" b="13243">in</wd>

<space/>

<wd l="5525" t="13090" r="5784" b="13248">the</wd>

<space/>

</ln>

<ln l="1426" t="13344" r="5784" b="13546" baseLine="13498" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13368" r="1944" b="13536">tweet,</wd>

<space/>

<wd l="2040" t="13344" r="2419" b="13502">then</wd>

<space/>

<wd l="2506" t="13344" r="2765" b="13502">the</wd>

<space/>

<wd l="2851" t="13344" r="3202" b="13546">pair</wd>

<space/>

<wd l="3288" t="13344" r="3422" b="13502">is</wd>

<space/>

<wd l="3518" t="13344" r="4152" b="13502">labeled</wd>

<space/>

<wd l="4238" t="13397" r="4406" b="13502">as</wd>

<space/>

<wd l="4502" t="13344" r="4920" b="13502">class</wd>

<space/>

<wd l="5035" t="13349" r="5170" b="13536">1;</wd>

<space/>

<wd l="5270" t="13344" r="5784" b="13502">other-</wd>

</ln>

<ln l="1426" t="13594" r="5784" b="13795" baseLine="13747" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13594" r="1824" b="13752">wise</wd>

<space/>

<wd l="1886" t="13594" r="2146" b="13752">the</wd>

<space/>

<wd l="2203" t="13594" r="2554" b="13795">pair</wd>

<space/>

<wd l="2611" t="13594" r="2746" b="13752">is</wd>

<space/>

<wd l="2813" t="13594" r="3446" b="13752">labeled</wd>

<space/>

<wd l="3504" t="13646" r="3672" b="13752">as</wd>

<space/>

<wd l="3744" t="13594" r="4157" b="13752">class</wd>

<space/>

<wd l="4224" t="13598" r="4373" b="13752">0.</wd>

<space/>

<wd l="4440" t="13603" r="5093" b="13752">Feature</wd>

<space/>

<wd l="5155" t="13618" r="5784" b="13752">vectors</wd>

<space/>

</ln>

<ln l="1426" t="13848" r="5794" b="14006" baseLine="14002" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13848" r="1819" b="14006">with</wd>

<space/>

<wd l="1906" t="13848" r="2587" b="14006">features</wd>

<space/>

<wd l="2683" t="13848" r="3523" b="14006">described</wd>

<space/>

<wd l="3610" t="13848" r="4123" b="14006">above</wd>

<space/>

<wd l="4214" t="13901" r="4474" b="14006">are</wd>

<space/>

<wd l="4570" t="13848" r="5453" b="14006">calculated</wd>

<space/>

<wd l="5539" t="13848" r="5794" b="14006">for</wd>

<space/>

</ln>

<ln l="1430" t="14102" r="5794" b="14304" baseLine="14256" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14102" r="1829" b="14261">each</wd>

<space/>

<wd l="1896" t="14102" r="2285" b="14304">pair.</wd>

<space/>

<wd l="2371" t="14102" r="2818" b="14261">Then</wd>

<space/>

<wd l="2890" t="14155" r="2981" b="14261">a</wd>

<space/>

<wd l="3053" t="14102" r="3725" b="14261">random</wd>

<space/>

<wd l="3802" t="14102" r="4296" b="14261">forest</wd>

<space/>

<wd l="4358" t="14102" r="4930" b="14304">binary</wd>

<space/>

<wd l="5002" t="14102" r="5794" b="14261">classifier</wd>

<space/>

</ln>

<ln l="1430" t="14352" r="5784" b="14544" baseLine="14506" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14352" r="1565" b="14510">is</wd>

<space/>

<wd l="1694" t="14352" r="2381" b="14510">learned.</wd>

<space/>

<wd l="2506" t="14352" r="3034" b="14510">When</wd>

<space/>

<wd l="3149" t="14352" r="3408" b="14510">the</wd>

<space/>

<wd l="3538" t="14352" r="4330" b="14510">classifier</wd>

<space/>

<wd l="4450" t="14352" r="4579" b="14510">is</wd>

<space/>

<wd l="4709" t="14352" r="5395" b="14544">learned,</wd>

<space/>

<wd l="5525" t="14352" r="5784" b="14510">the</wd>

<space/>

</ln>

<ln l="1430" t="14606" r="5784" b="14808" baseLine="14760" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14606" r="1843" b="14765">class</wd>

<space/>

<wd l="1944" t="14606" r="2506" b="14808">(label)</wd>

<space/>

<wd l="2597" t="14606" r="3274" b="14808">weights</wd>

<space/>

<wd l="3370" t="14659" r="3629" b="14765">are</wd>

<space/>

<wd l="3720" t="14606" r="4450" b="14808">adjusted</wd>

<space/>

<wd l="4541" t="14606" r="5342" b="14808">inversely</wd>

<space/>

<wd l="5419" t="14659" r="5784" b="14808">pro-</wd>

</ln>

<ln l="1421" t="14861" r="5784" b="15062" baseLine="15014" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14861" r="2218" b="15062">portional</wd>

<space/>

<wd l="2285" t="14885" r="2448" b="15019">to</wd>

<space/>

<wd l="2525" t="14861" r="2942" b="15019">class</wd>

<space/>

<wd l="3019" t="14861" r="4022" b="15062">frequencies</wd>

<space/>

<wd l="4099" t="14861" r="4267" b="15014">in</wd>

<space/>

<wd l="4330" t="14861" r="4594" b="15019">the</wd>

<space/>

<wd l="4670" t="14861" r="5030" b="15019">data</wd>

<space/>

<wd l="5093" t="14861" r="5784" b="15019">because</wd>

<space/>

</ln>

</para>

</column>

<column l="6032" t="1430" r="10626" b="15439">

<para l="6134" t="1474" r="10493" b="1886" alignment="justified" li="72" ri="72" lsp="exactly" lspExact="254" language="en">

<ln l="6134" t="1474" r="10493" b="1675" baseLine="1627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="1474" r="6398" b="1632">the</wd>

<space/>

<wd l="6470" t="1474" r="6830" b="1632">data</wd>

<space/>

<wd l="6902" t="1474" r="7032" b="1632">is</wd>

<space/>

<wd l="7109" t="1474" r="8122" b="1632">imbalanced</wd>

<space/>

<wd l="8189" t="1474" r="8501" b="1632">and</wd>

<space/>

<wd l="8563" t="1474" r="9307" b="1675">majority</wd>

<space/>

<wd l="9374" t="1474" r="9576" b="1632">of</wd>

<space/>

<wd l="9619" t="1474" r="9878" b="1632">the</wd>

<space/>

<wd l="9955" t="1474" r="10493" b="1632">obser-</wd>

</ln>

<ln l="6134" t="1728" r="8011" b="1886" baseLine="1882" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="1728" r="6763" b="1886">vations</wd>

<space/>

<wd l="6830" t="1781" r="7090" b="1886">are</wd>

<space/>

<wd l="7152" t="1728" r="7320" b="1882">in</wd>

<space/>

<wd l="7382" t="1728" r="7795" b="1886">class</wd>

<space/>

<wd l="7862" t="1733" r="8011" b="1886">0.</wd>

</ln>

</para>

<para l="6134" t="2194" r="8702" b="2410" alignment="left" li="72" ri="72" spaceBefore="208" lsp="exactly" lspExact="274" language="en">

<ln l="6134" t="2194" r="8702" b="2410" baseLine="2357" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">

<wd l="6134" t="2194" r="6245" b="2362">3</wd>

<space/>

<wd l="6566" t="2194" r="7598" b="2362">Resources</wd>

<space/>

<wd l="7670" t="2194" r="8702" b="2410">Employed</wd>

</ln>

</para>

<para l="6134" t="2616" r="10502" b="4800" alignment="justified" li="72" ri="72" spaceBefore="145" lsp="exactly" lspExact="254" language="en">

<ln l="6134" t="2616" r="10493" b="2818" baseLine="2770" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="2626" r="6432" b="2774">We</wd>

<space/>

<wd l="6528" t="2616" r="7675" b="2818">implemented</wd>

<space/>

<wd l="7757" t="2640" r="8078" b="2774">two</wd>

<space/>

<wd l="8170" t="2616" r="8736" b="2774">modes</wd>

<space/>

<wd l="8832" t="2616" r="9091" b="2774">for</wd>

<space/>

<wd l="9173" t="2669" r="9466" b="2774">our</wd>

<space/>

<wd l="9547" t="2616" r="10493" b="2774">normaliza-</wd>

</ln>

<ln l="6134" t="2870" r="10493" b="3072" baseLine="3024" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="2870" r="6480" b="3029">tion</wd>

<space/>

<wd l="6576" t="2894" r="7219" b="3072">system:</wd>

<space/>

<wd l="7330" t="2923" r="7421" b="3029">a</wd>

<space/>

<wd l="7517" t="2870" r="8525" b="3029">constrained</wd>

<space/>

<wd l="8611" t="2870" r="9096" b="3029">mode</wd>

<space/>

<wd l="9192" t="2870" r="9509" b="3029">and</wd>

<space/>

<wd l="9600" t="2923" r="9797" b="3029">an</wd>

<space/>

<wd l="9888" t="2923" r="10493" b="3029">uncon-</wd>

</ln>

<ln l="6144" t="3082" r="10502" b="3326" baseLine="3273">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6144" t="3125" r="6830" b="3283">strained</wd>

<space/>

</run>

<wd l="6907" t="3082" r="7512" b="3283"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">mode.</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">3</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

<wd l="7584" t="3125" r="7915" b="3283">The</wd>

<space/>

<wd l="8002" t="3125" r="9010" b="3283">constrained</wd>

<space/>

<wd l="9086" t="3125" r="9571" b="3283">mode</wd>

<space/>

<wd l="9653" t="3178" r="10022" b="3283">uses</wd>

<space/>

<wd l="10114" t="3125" r="10502" b="3326">only</wd>

<space/>

</run>

</ln>

<ln l="6134" t="3374" r="10502" b="3576" baseLine="3528">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6134" t="3374" r="6398" b="3533">the</wd>

<space/>

<wd l="6499" t="3374" r="7181" b="3576">training</wd>

<space/>

<wd l="7286" t="3374" r="7646" b="3533">data</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="7752" t="3374" r="10075" b="3576">train_data_20150430.json</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="10186" t="3374" r="10502" b="3533">and</wd>

<space/>

</run>

</ln>

<ln l="6134" t="3629" r="10488" b="3830" baseLine="3782">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6134" t="3629" r="6398" b="3787">the</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6475" t="3629" r="6773" b="3787">ark</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6835" t="3629" r="7411" b="3787">twitter</wd>

<space/>

<wd l="7478" t="3634" r="7867" b="3787">POS</wd>

<space/>

<wd l="7949" t="3653" r="8501" b="3830">tagger</wd>

<space/>

<wd l="8573" t="3629" r="9298" b="3830">(Gimpel</wd>

<space/>

<wd l="9374" t="3653" r="9528" b="3787">et</wd>

<space/>

<wd l="9600" t="3629" r="9850" b="3821">al.,</wd>

<space/>

<wd l="9931" t="3629" r="10488" b="3830">2011).</wd>

<space/>

</run>

</ln>

<ln l="6139" t="3883" r="10493" b="4085" baseLine="4037" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6139" t="3883" r="6470" b="4042">The</wd>

<space/>

<wd l="6542" t="3883" r="7776" b="4042">unconstrained</wd>

<space/>

<wd l="7838" t="3883" r="8323" b="4042">mode</wd>

<space/>

<wd l="8390" t="3936" r="8760" b="4042">uses</wd>

<space/>

<wd l="8832" t="3883" r="9096" b="4042">the</wd>

<space/>

<wd l="9173" t="3883" r="10003" b="4042">canonical</wd>

<space/>

<wd l="10070" t="3893" r="10493" b="4085">Eng-</wd>

</ln>

<ln l="6139" t="4138" r="10502" b="4339" baseLine="4291">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6139" t="4138" r="6451" b="4296">lish</wd>

<space/>

<wd l="6614" t="4138" r="7258" b="4296">lexicon</wd>

<space/>

<wd l="7416" t="4138" r="8304" b="4339">dictionary</wd>

<space/>

</run>

<wd l="8453" t="4138" r="10162" b="4330"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">scowl.american.70</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

<wd l="10334" t="4138" r="10502" b="4291">in</wd>

<space/>

</run>

</ln>

<ln l="6139" t="4387" r="10502" b="4589" baseLine="4541" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6139" t="4387" r="6854" b="4546">addition</wd>

<space/>

<wd l="6931" t="4411" r="7090" b="4546">to</wd>

<space/>

<wd l="7181" t="4387" r="7392" b="4546">all</wd>

<space/>

<wd l="7469" t="4440" r="8290" b="4546">resources</wd>

<space/>

<wd l="8371" t="4387" r="8779" b="4546">used</wd>

<space/>

<wd l="8846" t="4387" r="9072" b="4589">by</wd>

<space/>

<wd l="9144" t="4387" r="9408" b="4546">the</wd>

<space/>

<wd l="9490" t="4387" r="10502" b="4546">constrained</wd>

<space/>

</ln>

<ln l="6134" t="4642" r="6667" b="4800" baseLine="4795" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="4642" r="6667" b="4800">mode.</wd>

</ln>

</para>

<para l="6139" t="5102" r="9005" b="5323" alignment="left" li="72" ri="72" spaceBefore="208" lsp="exactly" lspExact="274" language="en">

<ln l="6139" t="5102" r="9005" b="5323" baseLine="5270" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="9">

<wd l="6139" t="5107" r="6250" b="5270">4</wd>

<space/>

<wd l="6576" t="5107" r="7373" b="5323">Settings</wd>

<space/>

<wd l="7450" t="5107" r="7824" b="5275">and</wd>

<space/>

<wd l="7891" t="5107" r="9005" b="5275">Evaluation</wd>

</ln>

</para>

<para l="6134" t="5530" r="10502" b="6446" alignment="justified" li="72" ri="72" spaceBefore="149" lsp="exactly" lspExact="254" language="en">

<ln l="6134" t="5530" r="10502" b="5688" baseLine="5683" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="5539" r="6446" b="5688">For</wd>

<space/>

<wd l="6605" t="5530" r="6998" b="5688">both</wd>

<space/>

<wd l="7166" t="5530" r="7430" b="5688">the</wd>

<space/>

<wd l="7608" t="5530" r="8616" b="5688">constrained</wd>

<space/>

<wd l="8789" t="5530" r="9101" b="5688">and</wd>

<space/>

<wd l="9269" t="5530" r="10502" b="5688">unconstrained</wd>

<space/>

</ln>

<ln l="6134" t="5784" r="10493" b="5986" baseLine="5938" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="5784" r="6754" b="5976">modes,</wd>

<space/>

<wd l="6835" t="5837" r="7090" b="5942">we</wd>

<space/>

<wd l="7166" t="5837" r="7454" b="5942">use</wd>

<space/>

<wd l="7536" t="5784" r="7925" b="5986">only</wd>

<space/>

<wd l="7992" t="5784" r="8698" b="5986">bigrams</wd>

<space/>

<wd l="8784" t="5784" r="9096" b="5942">and</wd>

<space/>

<wd l="9192" t="5784" r="10493" b="5986">1-skip-bigrams</wd>

<space/>

</ln>

<ln l="6139" t="6038" r="10502" b="6240" baseLine="6192" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="6091" r="6312" b="6197">as</wd>

<space/>

<wd l="6437" t="6038" r="7267" b="6240">similarity</wd>

<space/>

<wd l="7382" t="6038" r="8117" b="6197">features.</wd>

<space/>

<wd l="8237" t="6038" r="8568" b="6197">The</wd>

<space/>

<wd l="8688" t="6038" r="9653" b="6197">differences</wd>

<space/>

<wd l="9763" t="6038" r="10502" b="6197">between</wd>

<space/>

</ln>

<ln l="6134" t="6288" r="8909" b="6446" baseLine="6442" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6288" r="6398" b="6446">the</wd>

<space/>

<wd l="6461" t="6312" r="6778" b="6446">two</wd>

<space/>

<wd l="6845" t="6288" r="7411" b="6446">modes</wd>

<space/>

<wd l="7478" t="6341" r="7738" b="6446">are</wd>

<space/>

<wd l="7800" t="6288" r="8275" b="6446">listed</wd>

<space/>

<wd l="8323" t="6288" r="8909" b="6446">below.</wd>

</ln>

</para>

<para l="6360" t="6542" r="8650" b="6701" alignment="left" li="360" ri="72" lsp="exactly" lspExact="254" language="en">

<ln l="6360" t="6542" r="8650" b="6701" baseLine="6696" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6360" t="6552" r="6672" b="6701">For</wd>

<space/>

<wd l="6725" t="6542" r="6984" b="6701">the</wd>

<space/>

<wd l="7051" t="6542" r="8059" b="6701">constrained</wd>

<space/>

<wd l="8117" t="6542" r="8650" b="6701">mode:</wd>

</ln>

</para>

<para l="6437" t="6811" r="10498" b="7267" alignment="justified" li="648" ri="72" spaceBefore="15" fli="-288" lsp="exactly" lspExact="254" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="6811" r="10498" b="7013" baseLine="6965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6437" t="6821" r="6720" b="6965">•</wd>

<tab position="6518"/>

<wd l="6720" t="6821" r="6850" b="6970">It</wd>

<space/>

<wd l="6931" t="6864" r="7301" b="6970">uses</wd>

<space/>

<wd l="7387" t="6811" r="8462" b="7013">best-scoring</wd>

<space/>

<wd l="8554" t="6811" r="9389" b="6970">canonical</wd>

<space/>

<wd l="9480" t="6811" r="9979" b="6970">forms</wd>

<space/>

<wd l="10075" t="6811" r="10498" b="6970">from</wd>

<space/>

</ln>

<ln l="6715" t="7066" r="9691" b="7267" baseLine="7219" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6715" t="7066" r="6974" b="7224">the</wd>

<space/>

<wd l="7046" t="7066" r="7882" b="7267">similarity</wd>

<space/>

<wd l="7939" t="7066" r="8419" b="7224">index</wd>

<space/>

<wd l="8486" t="7118" r="8654" b="7224">as</wd>

<space/>

<wd l="8722" t="7066" r="9691" b="7224">candidates.</wd>

</ln>

</para>

<para l="6437" t="7450" r="10493" b="8414" alignment="justified" li="648" ri="72" spaceBefore="127" fli="-288" lsp="exactly" lspExact="254" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="7450" r="10493" b="7651" baseLine="7603" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6437" t="7459" r="6720" b="7603">•</wd>

<tab position="6518"/>

<wd l="6720" t="7459" r="6850" b="7608">It</wd>

<space/>

<wd l="6922" t="7502" r="7291" b="7608">uses</wd>

<space/>

<wd l="7387" t="7450" r="8222" b="7651">similarity</wd>

<space/>

<wd l="8299" t="7450" r="8779" b="7608">index</wd>

<space/>

<wd l="8866" t="7450" r="9120" b="7608">for</wd>

<space/>

<wd l="9197" t="7450" r="10027" b="7608">candidate</wd>

<space/>

<wd l="10114" t="7502" r="10493" b="7651">gen-</wd>

</ln>

<ln l="6720" t="7704" r="10493" b="7906" baseLine="7858" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6720" t="7704" r="7325" b="7862">eration</wd>

<space/>

<wd l="7426" t="7704" r="7810" b="7906">only</wd>

<space/>

<wd l="7906" t="7704" r="8386" b="7862">when</wd>

<space/>

<wd l="8477" t="7704" r="8741" b="7862">the</wd>

<space/>

<wd l="8842" t="7704" r="9331" b="7862">token</wd>

<space/>

<wd l="9432" t="7704" r="10152" b="7862">contains</wd>

<space/>

<wd l="10258" t="7757" r="10493" b="7862">re-</wd>

</ln>

<ln l="6710" t="7958" r="10493" b="8160" baseLine="8112" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6710" t="7958" r="7368" b="8160">petitive</wd>

<space/>

<wd l="7483" t="7958" r="8362" b="8117">characters</wd>

<space/>

<wd l="8477" t="7958" r="8990" b="8160">(same</wd>

<space/>

<wd l="9106" t="7958" r="9912" b="8117">character</wd>

<space/>

<wd l="10018" t="8011" r="10493" b="8117">occu-</wd>

</ln>

<ln l="6710" t="8112" r="9350" b="8414" baseLine="8359">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6710" t="8213" r="7210" b="8414">pying</wd>

<space/>

<wd l="7272" t="8213" r="8304" b="8371">consecutive</wd>

<space/>

</run>

<wd l="8357" t="8170" r="9350" b="8414"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">positions).</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">4</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6437" t="8597" r="10498" b="9264" alignment="justified" li="648" ri="72" spaceBefore="135" fli="-288" lsp="exactly" lspExact="254" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="8597" r="10498" b="8798" baseLine="8750" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6437" t="8606" r="6720" b="8750">•</wd>

<tab position="6518"/>

<wd l="6720" t="8606" r="6850" b="8755">It</wd>

<space/>

<wd l="6950" t="8597" r="7483" b="8755">builds</wd>

<space/>

<wd l="7603" t="8650" r="7694" b="8755">a</wd>

<space/>

<wd l="7810" t="8597" r="8645" b="8798">similarity</wd>

<space/>

<wd l="8755" t="8597" r="9235" b="8755">index</wd>

<space/>

<wd l="9341" t="8597" r="9850" b="8755">based</wd>

<space/>

<wd l="9960" t="8650" r="10176" b="8755">on</wd>

<space/>

<wd l="10286" t="8597" r="10498" b="8755">all</wd>

<space/>

</ln>

<ln l="6720" t="8851" r="10493" b="9053" baseLine="9005" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6720" t="8851" r="7550" b="9010">canonical</wd>

<space/>

<wd l="7627" t="8851" r="8126" b="9010">forms</wd>

<space/>

<wd l="8194" t="8875" r="8837" b="9053">present</wd>

<space/>

<wd l="8904" t="8851" r="9072" b="9005">in</wd>

<space/>

<wd l="9134" t="8851" r="9398" b="9010">the</wd>

<space/>

<wd l="9470" t="8851" r="10147" b="9053">training</wd>

<space/>

<wd l="10224" t="8851" r="10493" b="9010">da-</wd>

</ln>

<ln l="6715" t="9110" r="6917" b="9264" baseLine="9259" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6715" t="9130" r="6917" b="9264">ta.</wd>

</ln>

</para>

<para l="6437" t="9494" r="10498" b="10162" alignment="justified" li="648" ri="72" spaceBefore="136" fli="-288" lsp="exactly" lspExact="254" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="9494" r="10493" b="9696" baseLine="9648" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6437" t="9504" r="6715" b="9648">•</wd>

<tab position="6518"/>

<wd l="6715" t="9494" r="7656" b="9696">Dictionary</wd>

<space/>

<wd l="7723" t="9494" r="8035" b="9653">and</wd>

<space/>

<wd l="8102" t="9494" r="8702" b="9653">feature</wd>

<space/>

<wd l="8779" t="9494" r="9490" b="9696">learning</wd>

<space/>

<wd l="9562" t="9494" r="9874" b="9653">and</wd>

<space/>

<wd l="9941" t="9494" r="10493" b="9653">classi-</wd>

</ln>

<ln l="6720" t="9749" r="10498" b="9950" baseLine="9902" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6720" t="9749" r="7022" b="9907">fier</wd>

<space/>

<wd l="7123" t="9749" r="7800" b="9950">training</wd>

<space/>

<wd l="7915" t="9802" r="8170" b="9907">are</wd>

<space/>

<wd l="8275" t="9749" r="8779" b="9907">based</wd>

<space/>

<wd l="8890" t="9802" r="9106" b="9907">on</wd>

<space/>

<wd l="9206" t="9749" r="9470" b="9907">the</wd>

<space/>

<wd l="9590" t="9802" r="10022" b="9907">same</wd>

<space/>

<wd l="10138" t="9749" r="10498" b="9907">data</wd>

<space/>

</ln>

<ln l="6725" t="10008" r="7003" b="10162" baseLine="10157" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6725" t="10027" r="7003" b="10162">set.</wd>

</ln>

</para>

<para l="6422" t="10373" r="8981" b="10531" alignment="left" li="360" ri="72" spaceBefore="115" lsp="exactly" lspExact="254" language="en">

<ln l="6422" t="10373" r="8981" b="10531" baseLine="10526" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6422" t="10382" r="6734" b="10531">For</wd>

<space/>

<wd l="6787" t="10373" r="7046" b="10531">the</wd>

<space/>

<wd l="7109" t="10373" r="8390" b="10531">Unconstrained</wd>

<space/>

<wd l="8448" t="10373" r="8981" b="10531">mode:</wd>

</ln>

</para>

<para l="6437" t="10762" r="10493" b="11218" alignment="justified" li="648" ri="72" spaceBefore="136" fli="-288" lsp="exactly" lspExact="254" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="10762" r="10493" b="10963" baseLine="10915">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6437" t="10771" r="6720" b="10915">•</wd>

<tab position="6518"/>

<wd l="6720" t="10771" r="6850" b="10920">It</wd>

<space/>

<wd l="6922" t="10814" r="7291" b="10920">uses</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7382" t="10766" r="7834" b="10963">top-3</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7910" t="10762" r="8990" b="10963">best-scoring</wd>

<space/>

<wd l="9072" t="10762" r="9907" b="10920">canonical</wd>

<space/>

<wd l="9994" t="10762" r="10493" b="10920">forms</wd>

<space/>

</run>

</ln>

<ln l="6720" t="11016" r="10171" b="11218" baseLine="11170" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6720" t="11016" r="7138" b="11174">from</wd>

<space/>

<wd l="7195" t="11016" r="7459" b="11174">the</wd>

<space/>

<wd l="7531" t="11016" r="8362" b="11218">similarity</wd>

<space/>

<wd l="8424" t="11016" r="8904" b="11174">index</wd>

<space/>

<wd l="8966" t="11069" r="9134" b="11174">as</wd>

<space/>

<wd l="9206" t="11016" r="10171" b="11174">candidates.</wd>

</ln>

</para>

<para l="6437" t="11405" r="10498" b="11861" alignment="justified" li="648" ri="72" spaceBefore="135" fli="-288" lsp="exactly" lspExact="253" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="11405" r="10498" b="11606" baseLine="11558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6437" t="11414" r="6720" b="11558">•</wd>

<tab position="6518"/>

<wd l="6720" t="11414" r="6850" b="11563">It</wd>

<space/>

<wd l="6950" t="11405" r="7483" b="11563">builds</wd>

<space/>

<wd l="7603" t="11458" r="7694" b="11563">a</wd>

<space/>

<wd l="7810" t="11405" r="8645" b="11606">similarity</wd>

<space/>

<wd l="8755" t="11405" r="9235" b="11563">index</wd>

<space/>

<wd l="9341" t="11405" r="9850" b="11563">based</wd>

<space/>

<wd l="9960" t="11458" r="10176" b="11563">on</wd>

<space/>

<wd l="10286" t="11405" r="10498" b="11563">all</wd>

<space/>

</ln>

<ln l="6720" t="11659" r="10498" b="11861" baseLine="11813" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6720" t="11659" r="7550" b="11818">canonical</wd>

<space/>

<wd l="7618" t="11659" r="8117" b="11818">forms</wd>

<space/>

<wd l="8189" t="11659" r="8357" b="11813">in</wd>

<space/>

<wd l="8414" t="11659" r="8674" b="11818">the</wd>

<space/>

<wd l="8741" t="11659" r="9418" b="11861">training</wd>

<space/>

<wd l="9485" t="11659" r="9845" b="11818">data</wd>

<space/>

<wd l="9907" t="11659" r="10224" b="11818">and</wd>

<space/>

<wd l="10286" t="11659" r="10498" b="11818">all</wd>

</ln>

</para>

<para l="6710" t="11909" r="10498" b="12322" alignment="left" li="648" ri="72" lsp="exactly" lspExact="253" language="en">

<tabs position="6710"/>

<tabs alignment="left" position="12322" leaderChar=" "/>

<tabs alignment="right" position="4464" leaderChar=" "/>

<ln l="6720" t="11909" r="10498" b="12110" baseLine="12062" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6720" t="11909" r="7435" b="12067">lexicons</wd>

<tab position="7435"/>

<wd l="8026" t="11909" r="8194" b="12062">in</wd>

<tab position="8194"/>

<wd l="8765" t="11909" r="9029" b="12067">the</wd>

<tab position="9029"/>

<wd l="9614" t="11909" r="10498" b="12110">dictionary
</wd>

</ln>

<ln l="6710" t="12163" r="8419" b="12322" baseLine="12317" forcedEOF="true">

<wd l="6710" t="12163" r="8419" b="12322"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">scowl.american.70</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6437" t="12552" r="10493" b="13008" alignment="justified" li="648" ri="72" spaceBefore="134" spaceAfter="217" fli="-288" lsp="exactly" lspExact="254" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6437" t="12552" r="10493" b="12754" baseLine="12706" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6437" t="12562" r="6720" b="12706">•</wd>

<tab position="6518"/>

<wd l="6720" t="12562" r="6850" b="12710">It</wd>

<space/>

<wd l="6912" t="12552" r="7512" b="12754">always</wd>

<space/>

<wd l="7579" t="12605" r="7949" b="12710">uses</wd>

<space/>

<wd l="8021" t="12552" r="8285" b="12710">the</wd>

<space/>

<wd l="8362" t="12552" r="9192" b="12754">similarity</wd>

<space/>

<wd l="9259" t="12552" r="9739" b="12710">index</wd>

<space/>

<wd l="9811" t="12552" r="10066" b="12710">for</wd>

<space/>

<wd l="10128" t="12605" r="10493" b="12710">can-</wd>

</ln>

<ln l="6720" t="12806" r="8280" b="13008" baseLine="12960" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6720" t="12806" r="7243" b="12965">didate</wd>

<space/>

<wd l="7310" t="12806" r="8280" b="13008">generation.</wd>

</ln>

</para>

<rulerline l="6134" t="13248" r="9019" b="13248" type="single" width="14" color="000000"/>

<para l="6134" t="13368" r="10445" b="14189" alignment="left" li="72" ri="72" spaceBefore="131" lsp="exactly" lspExact="206" language="en">

<ln l="6139" t="13368" r="10210" b="13570" baseLine="13527">

<run underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6139" t="13368" r="6187" b="13454">3</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6245" t="13406" r="6518" b="13536">The</wd>

<space/>

<wd l="6566" t="13406" r="7574" b="13536">unconstrained</wd>

<space/>

<wd l="7618" t="13406" r="8016" b="13536">mode</wd>

<space/>

<wd l="8064" t="13445" r="8342" b="13536">was</wd>

<space/>

<wd l="8395" t="13406" r="9130" b="13570">developed</wd>

<space/>

<wd l="9173" t="13406" r="9566" b="13536">when</wd>

<space/>

<wd l="9610" t="13445" r="9816" b="13536">we</wd>

<space/>

<wd l="9864" t="13445" r="10210" b="13536">were</wd>

<space/>

</run>

</ln>

<ln l="6134" t="13613" r="10306" b="13776" baseLine="13738" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="13613" r="6653" b="13776">writing</wd>

<space/>

<wd l="6701" t="13613" r="6960" b="13742">this</wd>

<space/>

<wd l="7003" t="13651" r="7445" b="13776">paper,</wd>

<space/>

<wd l="7502" t="13613" r="7829" b="13742">after</wd>

<space/>

<wd l="7872" t="13613" r="8088" b="13742">the</wd>

<space/>

<wd l="8141" t="13613" r="8899" b="13742">annotation</wd>

<space/>

<wd l="8947" t="13613" r="9154" b="13742">for</wd>

<space/>

<wd l="9197" t="13613" r="9413" b="13742">the</wd>

<space/>

<wd l="9461" t="13632" r="9715" b="13742">test</wd>

<space/>

<wd l="9763" t="13613" r="10056" b="13742">data</wd>

<space/>

<wd l="10109" t="13632" r="10306" b="13742">set</wd>

<space/>

</ln>

<ln l="6134" t="13819" r="10445" b="13982" baseLine="13944" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="13858" r="6413" b="13949">was</wd>

<space/>

<wd l="6461" t="13819" r="7104" b="13949">revealed.</wd>

<space/>

<wd l="7166" t="13819" r="7522" b="13982">Only</wd>

<space/>

<wd l="7565" t="13819" r="7781" b="13949">the</wd>

<space/>

<wd l="7834" t="13819" r="8659" b="13949">constrained</wd>

<space/>

<wd l="8707" t="13819" r="9101" b="13949">mode</wd>

<space/>

<wd l="9149" t="13858" r="9427" b="13949">was</wd>

<space/>

<wd l="9480" t="13819" r="10186" b="13949">submitted</wd>

<space/>

<wd l="10234" t="13819" r="10445" b="13949">for</wd>

<space/>

</ln>

<ln l="6134" t="14026" r="7291" b="14189" baseLine="14150" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="14026" r="6350" b="14155">the</wd>

<space/>

<wd l="6403" t="14026" r="7291" b="14189">competition.</wd>

</ln>

</para>

<para l="6134" t="14194" r="10450" b="15398" alignment="left" li="72" ri="72" spaceBefore="8" lsp="exactly" lspExact="205" language="en">

<ln l="6134" t="14194" r="10426" b="14395" baseLine="14354">

<run underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="14194" r="6192" b="14275">4</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6245" t="14232" r="6557" b="14362">This</wd>

<space/>

<wd l="6610" t="14232" r="6720" b="14362">is</wd>

<space/>

<wd l="6763" t="14232" r="7334" b="14362">because</wd>

<space/>

<wd l="7387" t="14270" r="7464" b="14362">a</wd>

<space/>

<wd l="7517" t="14232" r="8198" b="14395">similarity</wd>

<space/>

<wd l="8251" t="14232" r="8645" b="14362">index</wd>

<space/>

<wd l="8683" t="14232" r="9101" b="14362">based</wd>

<space/>

<wd l="9149" t="14270" r="9326" b="14362">on</wd>

<space/>

<wd l="9374" t="14232" r="9902" b="14362">smaller</wd>

<space/>

<wd l="9946" t="14232" r="10426" b="14362">vocab-</wd>

</run>

</ln>

<ln l="6134" t="14438" r="10296" b="14602" baseLine="14563" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="14438" r="6504" b="14602">ulary</wd>

<space/>

<wd l="6557" t="14438" r="6917" b="14568">leads</wd>

<space/>

<wd l="6965" t="14458" r="7104" b="14568">to</wd>

<space/>

<wd l="7157" t="14438" r="7416" b="14568">less</wd>

<space/>

<wd l="7464" t="14438" r="8002" b="14568">reliable</wd>

<space/>

<wd l="8054" t="14438" r="8842" b="14568">candidates.</wd>

<space/>

<wd l="8899" t="14443" r="9154" b="14568">For</wd>

<space/>

<wd l="9202" t="14438" r="9845" b="14602">example,</wd>

<space/>

<wd l="9902" t="14438" r="10037" b="14563">in</wd>

<space/>

<wd l="10080" t="14438" r="10296" b="14568">the</wd>

<space/>

</ln>

<ln l="6139" t="14645" r="10123" b="14808" baseLine="14770" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="14645" r="6744" b="14808">example</wd>

<space/>

<wd l="6797" t="14645" r="7262" b="14774">shown</wd>

<space/>

<wd l="7310" t="14645" r="7445" b="14770">in</wd>

<space/>

<wd l="7488" t="14645" r="7958" b="14808">Figure</wd>

<space/>

<wd l="8026" t="14650" r="8131" b="14803">1,</wd>

<space/>

<wd l="8184" t="14645" r="8400" b="14774">the</wd>

<space/>

<wd l="8453" t="14645" r="9139" b="14808">similarity</wd>

<space/>

<wd l="9187" t="14645" r="9586" b="14774">index</wd>

<space/>

<wd l="9629" t="14664" r="10123" b="14774">returns</wd>

<space/>

</ln>

<ln l="6139" t="14856" r="10450" b="14986" baseLine="14981" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="14856" r="6514" b="14986">“car”</wd>

<space/>

<wd l="6566" t="14894" r="6706" b="14986">as</wd>

<space/>

<wd l="6758" t="14894" r="6835" b="14986">a</wd>

<space/>

<wd l="6883" t="14856" r="7565" b="14986">candidate</wd>

<space/>

<wd l="7618" t="14856" r="7786" b="14986">of</wd>

<space/>

<wd l="7814" t="14856" r="8174" b="14986">“cat”</wd>

<space/>

<wd l="8218" t="14856" r="8789" b="14986">because</wd>

<space/>

<wd l="8842" t="14856" r="9211" b="14986">“car”</wd>

<space/>

<wd l="9264" t="14856" r="9379" b="14986">is</wd>

<space/>

<wd l="9427" t="14856" r="9643" b="14986">the</wd>

<space/>

<wd l="9691" t="14875" r="10046" b="14986">most</wd>

<space/>

<wd l="10090" t="14856" r="10450" b="14986">simi-</wd>

</ln>

<ln l="6139" t="15062" r="10301" b="15226" baseLine="15187" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="15062" r="6326" b="15192">lar</wd>

<space/>

<wd l="6374" t="15062" r="7056" b="15192">canonical</wd>

<space/>

<wd l="7109" t="15062" r="7454" b="15192">form</wd>

<space/>

<wd l="7502" t="15062" r="7642" b="15187">in</wd>

<space/>

<wd l="7685" t="15062" r="8242" b="15226">training</wd>

<space/>

<wd l="8294" t="15062" r="8621" b="15192">data.</wd>

<space/>

<wd l="8678" t="15067" r="8832" b="15187">In</wd>

<space/>

<wd l="8880" t="15101" r="8957" b="15192">a</wd>

<space/>

<wd l="9005" t="15062" r="9422" b="15226">larger</wd>

<space/>

<wd l="9466" t="15062" r="10301" b="15226">vocabulary,</wd>

<space/>

</ln>

<ln l="6139" t="15269" r="10018" b="15398" baseLine="15394" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="15269" r="6499" b="15398">“cat”</wd>

<space/>

<wd l="6552" t="15269" r="6931" b="15398">itself</wd>

<space/>

<wd l="6960" t="15269" r="7435" b="15398">should</wd>

<space/>

<wd l="7474" t="15269" r="7646" b="15398">be</wd>

<space/>

<wd l="7694" t="15269" r="7910" b="15398">the</wd>

<space/>

<wd l="7958" t="15288" r="8314" b="15398">most</wd>

<space/>

<wd l="8362" t="15269" r="8856" b="15398">similar</wd>

<space/>

<wd l="8904" t="15269" r="9586" b="15398">canonical</wd>

<space/>

<wd l="9638" t="15269" r="10018" b="15398">form.</wd>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15736" r="6181" b="15977">

<para l="5804" t="15792" r="6148" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15792" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="27">

<wd l="5870" t="15792" r="6082" b="15946">90</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4313.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1426" marginTop="1429" marginRight="1282" marginBottom="1292" offsetX="8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1426" t="1429" r="10627" b="15383">

<column l="1426" t="1429" r="5798" b="14975">

<para l="1728" t="1488" r="5789" b="2155" alignment="justified" li="576" spaceBefore="18" fli="-360" lsp="exactly" lspExact="253" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1728" t="1488" r="5784" b="1690" baseLine="1642" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1728" t="1498" r="2006" b="1642">•</wd>

<tab position="1809"/>

<wd l="2006" t="1488" r="2947" b="1690">Dictionary</wd>

<space/>

<wd l="3014" t="1488" r="3326" b="1646">and</wd>

<space/>

<wd l="3394" t="1488" r="3994" b="1646">feature</wd>

<space/>

<wd l="4070" t="1488" r="4781" b="1690">learning</wd>

<space/>

<wd l="4853" t="1488" r="5165" b="1646">and</wd>

<space/>

<wd l="5237" t="1488" r="5784" b="1646">classi-</wd>

</ln>

<ln l="2011" t="1742" r="5789" b="1944" baseLine="1896" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2011" t="1742" r="2314" b="1901">fier</wd>

<space/>

<wd l="2424" t="1742" r="3106" b="1944">training</wd>

<space/>

<wd l="3226" t="1795" r="3485" b="1901">are</wd>

<space/>

<wd l="3600" t="1742" r="4104" b="1901">based</wd>

<space/>

<wd l="4224" t="1795" r="4440" b="1901">on</wd>

<space/>

<wd l="4560" t="1742" r="5314" b="1901">different</wd>

<space/>

<wd l="5429" t="1742" r="5789" b="1901">data</wd>

<space/>

</ln>

<ln l="2016" t="2006" r="2376" b="2155" baseLine="2150" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2016" t="2021" r="2376" b="2155">sets.</wd>

</ln>

</para>

<para l="1426" t="2371" r="5794" b="7378" alignment="justified" spaceBefore="120" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="2371" r="5784" b="2573" baseLine="2525" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="2381" r="1963" b="2530">For</wd>

<space/>

<wd l="2016" t="2371" r="2280" b="2530">the</wd>

<space/>

<wd l="2347" t="2371" r="3355" b="2530">constrained</wd>

<space/>

<wd l="3413" t="2371" r="3946" b="2563">mode,</wd>

<space/>

<wd l="4018" t="2371" r="5030" b="2530">dictionaries</wd>

<space/>

<wd l="5102" t="2371" r="5784" b="2573">(includ-</wd>

</ln>

<ln l="1430" t="2626" r="5784" b="2827" baseLine="2779" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="2626" r="1704" b="2827">ing</wd>

<space/>

<wd l="1810" t="2626" r="2256" b="2784">static</wd>

<space/>

<wd l="2357" t="2626" r="3120" b="2827">mapping</wd>

<space/>

<wd l="3221" t="2626" r="4109" b="2827">dictionary</wd>

<space/>

<wd l="4205" t="2626" r="4517" b="2784">and</wd>

<space/>

<wd l="4622" t="2626" r="5453" b="2827">similarity</wd>

<space/>

<wd l="5549" t="2626" r="5784" b="2779">in-</wd>

</ln>

<ln l="1430" t="2875" r="5784" b="3077" baseLine="3029" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="2875" r="1862" b="3077">dex),</wd>

<space/>

<wd l="1939" t="2875" r="3096" b="3034">classification</wd>

<space/>

<wd l="3158" t="2875" r="3758" b="3034">feature</wd>

<space/>

<wd l="3830" t="2875" r="4790" b="3034">calculation</wd>

<space/>

<wd l="4858" t="2875" r="5170" b="3034">and</wd>

<space/>

<wd l="5232" t="2875" r="5784" b="3034">classi-</wd>

</ln>

<ln l="1430" t="3130" r="5794" b="3331" baseLine="3283" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="3130" r="1733" b="3288">fier</wd>

<space/>

<wd l="1829" t="3130" r="2506" b="3331">training</wd>

<space/>

<wd l="2611" t="3182" r="2870" b="3288">are</wd>

<space/>

<wd l="2966" t="3130" r="3475" b="3288">based</wd>

<space/>

<wd l="3576" t="3182" r="3792" b="3288">on</wd>

<space/>

<wd l="3883" t="3130" r="4147" b="3288">the</wd>

<space/>

<wd l="4258" t="3182" r="4694" b="3288">same</wd>

<space/>

<wd l="4805" t="3130" r="5165" b="3288">data</wd>

<space/>

<wd l="5270" t="3154" r="5549" b="3288">set.</wd>

<space/>

<wd l="5659" t="3139" r="5794" b="3288">It</wd>

<space/>

</ln>

<ln l="1430" t="3384" r="5794" b="3586" baseLine="3538" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="3437" r="1992" b="3542">causes</wd>

<space/>

<wd l="2117" t="3384" r="3034" b="3586">overfitting</wd>

<space/>

<wd l="3144" t="3384" r="3840" b="3542">because</wd>

<space/>

<wd l="3955" t="3384" r="4219" b="3542">the</wd>

<space/>

<wd l="4339" t="3384" r="5357" b="3542">dictionaries</wd>

<space/>

<wd l="5477" t="3384" r="5794" b="3542">and</wd>

<space/>

</ln>

<ln l="1426" t="3634" r="5789" b="3835" baseLine="3787" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="3634" r="1690" b="3792">the</wd>

<space/>

<wd l="1814" t="3658" r="2467" b="3835">support</wd>

<space/>

<wd l="2582" t="3634" r="2894" b="3792">and</wd>

<space/>

<wd l="3010" t="3634" r="3965" b="3792">confidence</wd>

<space/>

<wd l="4085" t="3634" r="4766" b="3792">features</wd>

<space/>

<wd l="4891" t="3634" r="5256" b="3792">leak</wd>

<space/>

<wd l="5371" t="3634" r="5789" b="3792">label</wd>

<space/>

</ln>

<ln l="1430" t="3888" r="5784" b="4080" baseLine="4042" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="3888" r="2510" b="4046">information.</wd>

<space/>

<wd l="2635" t="3898" r="3485" b="4080">However,</wd>

<space/>

<wd l="3619" t="3941" r="3912" b="4046">our</wd>

<space/>

<wd l="4032" t="3888" r="5429" b="4046">cross-validation</wd>

<space/>

<wd l="5549" t="3941" r="5784" b="4046">re-</wd>

</ln>

<ln l="1435" t="4142" r="5794" b="4344" baseLine="4296" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1435" t="4142" r="1819" b="4301">sults</wd>

<space/>

<wd l="1906" t="4142" r="2357" b="4301">show</wd>

<space/>

<wd l="2424" t="4142" r="2755" b="4301">that</wd>

<space/>

<wd l="2822" t="4142" r="3538" b="4344">learning</wd>

<space/>

<wd l="3610" t="4142" r="4670" b="4334">dictionaries,</wd>

<space/>

<wd l="4757" t="4166" r="5410" b="4344">support</wd>

<space/>

<wd l="5477" t="4142" r="5794" b="4301">and</wd>

<space/>

</ln>

<ln l="1430" t="4392" r="5784" b="4584" baseLine="4546" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="4392" r="2386" b="4550">confidence</wd>

<space/>

<wd l="2496" t="4392" r="3230" b="4584">features,</wd>

<space/>

<wd l="3346" t="4392" r="3658" b="4550">and</wd>

<space/>

<wd l="3763" t="4392" r="4555" b="4550">classifier</wd>

<space/>

<wd l="4656" t="4445" r="4872" b="4550">on</wd>

<space/>

<wd l="4973" t="4392" r="5232" b="4550">the</wd>

<space/>

<wd l="5347" t="4445" r="5784" b="4550">same</wd>

<space/>

</ln>

<ln l="1430" t="4646" r="5794" b="4848" baseLine="4800" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="4646" r="1790" b="4805">data</wd>

<space/>

<wd l="1862" t="4670" r="2098" b="4805">set</wd>

<space/>

<wd l="2165" t="4670" r="2981" b="4848">generates</wd>

<space/>

<wd l="3043" t="4646" r="3557" b="4805">better</wd>

<space/>

<wd l="3619" t="4646" r="4858" b="4848">generalization</wd>

<space/>

<wd l="4925" t="4699" r="5093" b="4805">as</wd>

<space/>

<wd l="5165" t="4646" r="5587" b="4805">well.</wd>

<space/>

<wd l="5664" t="4656" r="5794" b="4805">It</wd>

<space/>

</ln>

<ln l="1430" t="4901" r="5794" b="5102" baseLine="5054" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="4901" r="1867" b="5059">leads</wd>

<space/>

<wd l="1939" t="4925" r="2102" b="5059">to</wd>

<space/>

<wd l="2170" t="4901" r="2678" b="5059">better</wd>

<space/>

<wd l="2741" t="4906" r="2942" b="5054">F1</wd>

<space/>

<wd l="3043" t="4954" r="3490" b="5059">score</wd>

<space/>

<wd l="3562" t="4901" r="3941" b="5059">than</wd>

<space/>

<wd l="4013" t="4901" r="4718" b="5102">splitting</wd>

<space/>

<wd l="4786" t="4901" r="5050" b="5059">the</wd>

<space/>

<wd l="5122" t="4901" r="5482" b="5059">data</wd>

<space/>

<wd l="5558" t="4925" r="5794" b="5059">set</wd>

<space/>

</ln>

<ln l="1430" t="5155" r="5784" b="5357" baseLine="5309" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="5155" r="1762" b="5314">into</wd>

<space/>

<wd l="1848" t="5179" r="2165" b="5314">two</wd>

<space/>

<wd l="2246" t="5179" r="2674" b="5357">parts</wd>

<space/>

<wd l="2765" t="5155" r="3077" b="5314">and</wd>

<space/>

<wd l="3158" t="5155" r="3869" b="5357">learning</wd>

<space/>

<wd l="3955" t="5155" r="4968" b="5314">dictionaries</wd>

<space/>

<wd l="5059" t="5155" r="5371" b="5314">and</wd>

<space/>

<wd l="5453" t="5155" r="5784" b="5314">fea-</wd>

</ln>

<ln l="1426" t="5405" r="5794" b="5606" baseLine="5558" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5429" r="1843" b="5563">tures</wd>

<space/>

<wd l="1949" t="5458" r="2165" b="5563">on</wd>

<space/>

<wd l="2261" t="5458" r="2568" b="5563">one</wd>

<space/>

<wd l="2664" t="5429" r="3010" b="5606">part</wd>

<space/>

<wd l="3106" t="5405" r="3418" b="5563">and</wd>

<space/>

<wd l="3514" t="5405" r="4229" b="5606">learning</wd>

<space/>

<wd l="4325" t="5405" r="4589" b="5563">the</wd>

<space/>

<wd l="4690" t="5405" r="5482" b="5563">classifier</wd>

<space/>

<wd l="5578" t="5458" r="5794" b="5563">on</wd>

<space/>

</ln>

<ln l="1426" t="5659" r="5784" b="5861" baseLine="5813" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5659" r="1690" b="5818">the</wd>

<space/>

<wd l="1781" t="5659" r="2227" b="5818">other</wd>

<space/>

<wd l="2299" t="5683" r="2688" b="5861">part.</wd>

<space/>

<wd l="2784" t="5659" r="3163" b="5818">This</wd>

<space/>

<wd l="3254" t="5659" r="3389" b="5818">is</wd>

<space/>

<wd l="3470" t="5659" r="4166" b="5818">because</wd>

<space/>

<wd l="4253" t="5659" r="4848" b="5861">having</wd>

<space/>

<wd l="4934" t="5659" r="5362" b="5861">large</wd>

<space/>

<wd l="5453" t="5659" r="5784" b="5818">dic-</wd>

</ln>

<ln l="1426" t="5914" r="5794" b="6115" baseLine="6067" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5914" r="2174" b="6072">tionaries</wd>

<space/>

<wd l="2275" t="5914" r="2410" b="6072">is</wd>

<space/>

<wd l="2506" t="5914" r="3096" b="6072">crucial</wd>

<space/>

<wd l="3192" t="5914" r="3446" b="6072">for</wd>

<space/>

<wd l="3533" t="5914" r="4368" b="6072">candidate</wd>

<space/>

<wd l="4464" t="5914" r="5386" b="6115">generation</wd>

<space/>

<wd l="5477" t="5914" r="5794" b="6072">and</wd>

<space/>

</ln>

<ln l="1426" t="6163" r="5779" b="6322" baseLine="6317" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6163" r="1690" b="6322">the</wd>

<space/>

<wd l="1757" t="6187" r="2362" b="6322">correct</wd>

<space/>

<wd l="2424" t="6163" r="3254" b="6322">canonical</wd>

<space/>

<wd l="3322" t="6163" r="3739" b="6322">form</wd>

<space/>

<wd l="3806" t="6187" r="4387" b="6322">cannot</wd>

<space/>

<wd l="4435" t="6163" r="4642" b="6322">be</wd>

<space/>

<wd l="4709" t="6163" r="5218" b="6322">found</wd>

<space/>

<wd l="5280" t="6163" r="5434" b="6317">if</wd>

<space/>

<wd l="5472" t="6163" r="5587" b="6322">it</wd>

<space/>

<wd l="5650" t="6163" r="5779" b="6322">is</wd>

<space/>

</ln>

<ln l="1426" t="6418" r="5784" b="6619" baseLine="6571" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6442" r="1709" b="6576">not</wd>

<space/>

<wd l="1771" t="6470" r="2362" b="6619">among</wd>

<space/>

<wd l="2429" t="6418" r="2688" b="6576">the</wd>

<space/>

<wd l="2765" t="6418" r="3730" b="6576">candidates.</wd>

<space/>

<wd l="3802" t="6418" r="4320" b="6619">Using</wd>

<space/>

<wd l="4392" t="6418" r="4603" b="6576">all</wd>

<space/>

<wd l="4666" t="6418" r="4930" b="6576">the</wd>

<space/>

<wd l="5002" t="6418" r="5784" b="6576">available</wd>

<space/>

</ln>

<ln l="1430" t="6672" r="5784" b="6874" baseLine="6826" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="6672" r="1790" b="6830">data</wd>

<space/>

<wd l="1886" t="6672" r="2506" b="6830">instead</wd>

<space/>

<wd l="2602" t="6672" r="2803" b="6830">of</wd>

<space/>

<wd l="2880" t="6672" r="3586" b="6874">splitting</wd>

<space/>

<wd l="3686" t="6672" r="3806" b="6830">it</wd>

<space/>

<wd l="3902" t="6672" r="4459" b="6830">allows</wd>

<space/>

<wd l="4560" t="6672" r="4824" b="6830">the</wd>

<space/>

<wd l="4930" t="6696" r="5530" b="6874">system</wd>

<space/>

<wd l="5621" t="6696" r="5784" b="6830">to</wd>

<space/>

</ln>

<ln l="1430" t="6922" r="5784" b="7123" baseLine="7075" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="6922" r="1867" b="7080">learn</wd>

<space/>

<wd l="1934" t="6922" r="2448" b="7123">larger</wd>

<space/>

<wd l="2515" t="6922" r="3528" b="7080">dictionaries</wd>

<space/>

<wd l="3605" t="6922" r="3917" b="7080">and</wd>

<space/>

<wd l="3984" t="6974" r="4430" b="7080">more</wd>

<space/>

<wd l="4502" t="6922" r="4882" b="7080">than</wd>

<space/>

<wd l="4944" t="6922" r="5496" b="7080">makes</wd>

<space/>

<wd l="5573" t="6974" r="5784" b="7123">up</wd>

<space/>

</ln>

<ln l="1430" t="7176" r="3821" b="7378" baseLine="7330" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="7176" r="1685" b="7334">for</wd>

<space/>

<wd l="1738" t="7176" r="2002" b="7334">the</wd>

<space/>

<wd l="2069" t="7176" r="2986" b="7378">overfitting</wd>

<space/>

<wd l="3043" t="7176" r="3821" b="7378">problem.</wd>

</ln>

</para>

<para l="1426" t="7430" r="5794" b="9614" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="7430" r="5794" b="7622" baseLine="7584" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="7440" r="1963" b="7589">For</wd>

<space/>

<wd l="2050" t="7430" r="2309" b="7589">the</wd>

<space/>

<wd l="2405" t="7430" r="3638" b="7589">unconstrained</wd>

<space/>

<wd l="3725" t="7430" r="4258" b="7622">mode,</wd>

<space/>

<wd l="4363" t="7430" r="5376" b="7589">dictionaries</wd>

<space/>

<wd l="5477" t="7430" r="5794" b="7589">and</wd>

<space/>

</ln>

<ln l="1430" t="7685" r="5789" b="7848" baseLine="7838" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7685" r="2112" b="7843">features</wd>

<space/>

<wd l="2194" t="7738" r="2448" b="7843">are</wd>

<space/>

<wd l="2525" t="7685" r="3168" b="7843">learned</wd>

<space/>

<wd l="3235" t="7738" r="3451" b="7843">on</wd>

<space/>

<wd l="3523" t="7690" r="3912" b="7848">67%</wd>

<space/>

<wd l="3989" t="7685" r="4190" b="7843">of</wd>

<space/>

<wd l="4234" t="7685" r="4498" b="7843">the</wd>

<space/>

<wd l="4570" t="7685" r="5352" b="7843">available</wd>

<space/>

<wd l="5429" t="7685" r="5789" b="7843">data</wd>

<space/>

</ln>

<ln l="1430" t="7934" r="5784" b="8098" baseLine="8088" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7934" r="1742" b="8093">and</wd>

<space/>

<wd l="1814" t="7934" r="2074" b="8093">the</wd>

<space/>

<wd l="2155" t="7934" r="2947" b="8093">classifier</wd>

<space/>

<wd l="3014" t="7934" r="3149" b="8093">is</wd>

<space/>

<wd l="3230" t="7934" r="3874" b="8093">learned</wd>

<space/>

<wd l="3946" t="7987" r="4162" b="8093">on</wd>

<space/>

<wd l="4234" t="7939" r="4622" b="8098">33%</wd>

<space/>

<wd l="4709" t="7934" r="4906" b="8093">of</wd>

<space/>

<wd l="4954" t="7934" r="5218" b="8093">the</wd>

<space/>

<wd l="5294" t="7934" r="5784" b="8093">avail-</wd>

</ln>

<ln l="1430" t="8189" r="5789" b="8390" baseLine="8342" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="8189" r="1786" b="8347">able</wd>

<space/>

<wd l="1891" t="8189" r="2251" b="8347">data</wd>

<space/>

<wd l="2352" t="8189" r="3091" b="8390">(random</wd>

<space/>

<wd l="3197" t="8189" r="3682" b="8390">split).</wd>

<space/>

<wd l="3792" t="8189" r="4171" b="8347">This</wd>

<space/>

<wd l="4277" t="8189" r="4411" b="8347">is</wd>

<space/>

<wd l="4517" t="8189" r="5270" b="8347">different</wd>

<space/>

<wd l="5371" t="8189" r="5789" b="8347">from</wd>

<space/>

</ln>

<ln l="1430" t="8443" r="5794" b="8602" baseLine="8597" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="8443" r="2438" b="8602">constrained</wd>

<space/>

<wd l="2606" t="8443" r="3086" b="8602">mode</wd>

<space/>

<wd l="3254" t="8443" r="3950" b="8602">because</wd>

<space/>

<wd l="4123" t="8443" r="4387" b="8602">the</wd>

<space/>

<wd l="4555" t="8443" r="5794" b="8602">unconstrained</wd>

<space/>

</ln>

<ln l="1426" t="8693" r="5789" b="8894" baseLine="8846" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8693" r="1910" b="8851">mode</wd>

<space/>

<wd l="2026" t="8693" r="2669" b="8894">already</wd>

<space/>

<wd l="2774" t="8693" r="3058" b="8851">has</wd>

<space/>

<wd l="3178" t="8746" r="3269" b="8851">a</wd>

<space/>

<wd l="3374" t="8746" r="3763" b="8894">very</wd>

<space/>

<wd l="3874" t="8693" r="4306" b="8894">large</wd>

<space/>

<wd l="4421" t="8693" r="5256" b="8851">canonical</wd>

<space/>

<wd l="5366" t="8693" r="5789" b="8851">form</wd>

<space/>

</ln>

<ln l="1430" t="8942" r="5784" b="9149" baseLine="9101">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="8947" r="2318" b="9149">dictionary</wd>

<space/>

<wd l="2405" t="8947" r="2573" b="9101">in</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2654" t="8947" r="4320" b="9106">scowl.american.70</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4397" t="8947" r="4709" b="9106">and</wd>

<space/>

<wd l="4790" t="8947" r="5054" b="9106">the</wd>

<space/>

<wd l="5150" t="9000" r="5784" b="9106">accura-</wd>

</run>

</ln>

<ln l="1430" t="9202" r="5784" b="9403" baseLine="9355" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="9254" r="1637" b="9403">cy</wd>

<space/>

<wd l="1747" t="9202" r="1949" b="9360">of</wd>

<space/>

<wd l="2045" t="9202" r="2813" b="9403">selecting</wd>

<space/>

<wd l="2923" t="9202" r="3187" b="9360">the</wd>

<space/>

<wd l="3307" t="9226" r="3917" b="9360">correct</wd>

<space/>

<wd l="4027" t="9202" r="4862" b="9360">canonical</wd>

<space/>

<wd l="4978" t="9202" r="5400" b="9360">form</wd>

<space/>

<wd l="5506" t="9202" r="5784" b="9360">be-</wd>

</ln>

<ln l="1430" t="9456" r="3326" b="9614" baseLine="9610" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="9509" r="1978" b="9614">comes</wd>

<space/>

<wd l="2045" t="9456" r="2304" b="9614">the</wd>

<space/>

<wd l="2362" t="9456" r="3326" b="9614">bottleneck.</wd>

</ln>

</para>

<para l="1426" t="9706" r="5789" b="11933" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="9706" r="5789" b="9907" baseLine="9859" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="9715" r="1954" b="9864">We</wd>

<space/>

<wd l="2026" t="9706" r="2429" b="9864">used</wd>

<space/>

<wd l="2496" t="9706" r="2760" b="9864">the</wd>

<space/>

<wd l="2837" t="9706" r="3197" b="9864">data</wd>

<space/>

<wd l="3274" t="9730" r="3586" b="9864">sets</wd>

<space/>

<wd l="3658" t="9706" r="4445" b="9907">provided</wd>

<space/>

<wd l="4507" t="9706" r="4728" b="9907">by</wd>

<space/>

<wd l="4795" t="9706" r="5059" b="9864">the</wd>

<space/>

<wd l="5131" t="9715" r="5789" b="9864">WNUT</wd>

<space/>

</ln>

<ln l="1426" t="9960" r="5784" b="10162" baseLine="10114" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9965" r="1853" b="10118">2015</wd>

<space/>

<wd l="2050" t="9960" r="2626" b="10118">lexical</wd>

<space/>

<wd l="2808" t="9960" r="4032" b="10118">normalization</wd>

<space/>

<wd l="4214" t="9960" r="5261" b="10162">competition</wd>

<space/>

<wd l="5443" t="9960" r="5784" b="10162">(de-</wd>

</ln>

<ln l="1435" t="10214" r="5779" b="10416" baseLine="10368" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1435" t="10214" r="2064" b="10373">scribed</wd>

<space/>

<wd l="2136" t="10214" r="2304" b="10368">in</wd>

<space/>

<wd l="2381" t="10214" r="3192" b="10416">(Baldwin</wd>

<space/>

<wd l="3269" t="10238" r="3427" b="10373">et</wd>

<space/>

<wd l="3499" t="10214" r="3754" b="10406">al.,</wd>

<space/>

<wd l="3835" t="10214" r="4416" b="10416">2015))</wd>

<space/>

<wd l="4498" t="10214" r="4752" b="10373">for</wd>

<space/>

<wd l="4824" t="10214" r="5779" b="10373">evaluation.</wd>

<space/>

</ln>

<ln l="1426" t="10464" r="5784" b="10666" baseLine="10618" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10464" r="2045" b="10666">During</wd>

<space/>

<wd l="2117" t="10517" r="2410" b="10622">our</wd>

<space/>

<wd l="2477" t="10464" r="3610" b="10666">development</wd>

<space/>

<wd l="3677" t="10464" r="3878" b="10622">of</wd>

<space/>

<wd l="3917" t="10464" r="4181" b="10622">the</wd>

<space/>

<wd l="4262" t="10488" r="4992" b="10666">systems,</wd>

<space/>

<wd l="5074" t="10464" r="5458" b="10666">only</wd>

<space/>

<wd l="5520" t="10464" r="5784" b="10622">the</wd>

<space/>

</ln>

<ln l="1426" t="10714" r="5784" b="10920" baseLine="10872">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1426" t="10718" r="2107" b="10920">training</wd>

<space/>

<wd l="2203" t="10718" r="2563" b="10877">data</wd>

<space/>

<wd l="2659" t="10718" r="2938" b="10877">file</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="3038" t="10718" r="5362" b="10920">train_data_20150430.json</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="5448" t="10771" r="5784" b="10877">was</wd>

<space/>

</run>

</ln>

<ln l="1426" t="10973" r="5784" b="11174" baseLine="11126" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10973" r="1829" b="11131">used</wd>

<space/>

<wd l="1915" t="10973" r="2170" b="11131">for</wd>

<space/>

<wd l="2251" t="11026" r="2563" b="11174">any</wd>

<space/>

<wd l="2635" t="10997" r="3523" b="11174">parameter</wd>

<space/>

<wd l="3610" t="10973" r="4382" b="11131">selection</wd>

<space/>

<wd l="4464" t="10973" r="4781" b="11131">and</wd>

<space/>

<wd l="4862" t="10973" r="5434" b="11174">design</wd>

<space/>

<wd l="5515" t="10973" r="5784" b="11131">de-</wd>

</ln>

<ln l="1430" t="11222" r="5784" b="11381" baseLine="11376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="11222" r="2078" b="11381">cisions.</wd>

<space/>

<wd l="2227" t="11232" r="2530" b="11381">We</wd>

<space/>

<wd l="2669" t="11222" r="3072" b="11381">used</wd>

<space/>

<wd l="3216" t="11222" r="4618" b="11381">cross-validation</wd>

<space/>

<wd l="4752" t="11246" r="4915" b="11381">to</wd>

<space/>

<wd l="5064" t="11222" r="5784" b="11381">estimate</wd>

<space/>

</ln>

<ln l="1435" t="11477" r="5784" b="11678" baseLine="11630" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1435" t="11501" r="2035" b="11678">system</wd>

<space/>

<wd l="2093" t="11477" r="3254" b="11678">performance.</wd>

<space/>

<wd l="3331" t="11477" r="3662" b="11635">The</wd>

<space/>

<wd l="3734" t="11477" r="4742" b="11635">constrained</wd>

<space/>

<wd l="4810" t="11477" r="5122" b="11635">and</wd>

<space/>

<wd l="5179" t="11530" r="5784" b="11635">uncon-</wd>

</ln>

<ln l="1435" t="11731" r="5630" b="11933" baseLine="11885" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1435" t="11731" r="2122" b="11890">strained</wd>

<space/>

<wd l="2179" t="11731" r="2746" b="11890">modes</wd>

<space/>

<wd l="2808" t="11731" r="3216" b="11890">have</wd>

<space/>

<wd l="3288" t="11755" r="3994" b="11933">separate</wd>

<space/>

<wd l="4061" t="11731" r="4853" b="11890">classifier</wd>

<space/>

<wd l="4906" t="11731" r="5630" b="11933">training.</wd>

</ln>

</para>

<para l="1426" t="11986" r="5794" b="14966" alignment="justified" fli="216" lsp="exactly" lspExact="251" language="en">

<ln l="1656" t="11986" r="5784" b="12187" baseLine="12139" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1656" t="11986" r="2146" b="12144">Table</wd>

<space/>

<wd l="2266" t="11990" r="2352" b="12144">3</wd>

<space/>

<wd l="2491" t="11986" r="3019" b="12144">shows</wd>

<space/>

<wd l="3139" t="11986" r="3398" b="12144">the</wd>

<space/>

<wd l="3509" t="11986" r="4618" b="12187">performance</wd>

<space/>

<wd l="4738" t="11986" r="4939" b="12144">of</wd>

<space/>

<wd l="5026" t="11986" r="5285" b="12144">the</wd>

<space/>

<wd l="5405" t="12038" r="5784" b="12144">con-</wd>

</ln>

<ln l="1435" t="12235" r="5794" b="12394" baseLine="12389" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1435" t="12235" r="2122" b="12394">strained</wd>

<space/>

<wd l="2184" t="12235" r="2664" b="12394">mode</wd>

<space/>

<wd l="2731" t="12235" r="3120" b="12394">with</wd>

<space/>

<wd l="3187" t="12235" r="3941" b="12394">different</wd>

<space/>

<wd l="4008" t="12259" r="4320" b="12394">sets</wd>

<space/>

<wd l="4392" t="12235" r="4594" b="12394">of</wd>

<space/>

<wd l="4637" t="12235" r="5794" b="12394">classification</wd>

<space/>

</ln>

<ln l="1430" t="12485" r="5784" b="12691" baseLine="12643">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1430" t="12490" r="2112" b="12648">features</wd>

<space/>

<wd l="2184" t="12490" r="2688" b="12648">based</wd>

<space/>

<wd l="2755" t="12542" r="2971" b="12648">on</wd>

<space/>

<wd l="3038" t="12490" r="3298" b="12648">the</wd>

<space/>

<wd l="3370" t="12514" r="3677" b="12648">test</wd>

<space/>

<wd l="3744" t="12490" r="4104" b="12648">data</wd>

<space/>

<wd l="4176" t="12490" r="4459" b="12648">file</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="4531" t="12490" r="5784" b="12691">test_truth.json</wd>

<space/>

</run>

</ln>

<ln l="1430" t="12744" r="5794" b="12946" baseLine="12898" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="12744" r="2304" b="12902">concealed</wd>

<space/>

<wd l="2381" t="12744" r="2803" b="12902">from</wd>

<space/>

<wd l="2880" t="12744" r="4056" b="12946">development.</wd>

<space/>

<wd l="4142" t="12754" r="4272" b="12902">It</wd>

<space/>

<wd l="4349" t="12797" r="4651" b="12902">can</wd>

<space/>

<wd l="4714" t="12744" r="4920" b="12902">be</wd>

<space/>

<wd l="5011" t="12797" r="5390" b="12902">seen</wd>

<space/>

<wd l="5462" t="12744" r="5794" b="12902">that</wd>

<space/>

</ln>

<ln l="1426" t="12994" r="5794" b="13195" baseLine="13147" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="12994" r="1690" b="13152">the</wd>

<space/>

<wd l="1776" t="13018" r="2429" b="13195">support</wd>

<space/>

<wd l="2501" t="12994" r="2813" b="13152">and</wd>

<space/>

<wd l="2890" t="12994" r="3840" b="13152">confidence</wd>

<space/>

<wd l="3922" t="12994" r="4608" b="13152">features</wd>

<space/>

<wd l="4690" t="13046" r="4949" b="13152">are</wd>

<space/>

<wd l="5026" t="12994" r="5285" b="13152">the</wd>

<space/>

<wd l="5362" t="13018" r="5794" b="13152">most</wd>

<space/>

</ln>

<ln l="1430" t="13243" r="5794" b="13450" baseLine="13402" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13248" r="2285" b="13450">important</wd>

<space/>

<wd l="2376" t="13248" r="2630" b="13406">for</wd>

<space/>

<wd l="2722" t="13248" r="3566" b="13450">achieving</wd>

<space/>

<wd l="3662" t="13248" r="4051" b="13450">high</wd>

<space/>

<wd l="4138" t="13253" r="4349" b="13402">F1</wd>

<space/>

<wd l="4469" t="13301" r="4968" b="13406">score.</wd>

<space/>

<wd l="5069" t="13248" r="5794" b="13406">Without</wd>

<space/>

</ln>

<ln l="1426" t="13498" r="5784" b="13704" baseLine="13656" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="13502" r="1690" b="13661">the</wd>

<space/>

<wd l="1766" t="13526" r="2419" b="13704">support</wd>

<space/>

<wd l="2486" t="13502" r="2798" b="13661">and</wd>

<space/>

<wd l="2866" t="13502" r="3821" b="13661">confidence</wd>

<space/>

<wd l="3893" t="13502" r="4627" b="13694">features,</wd>

<space/>

<wd l="4699" t="13502" r="4963" b="13661">the</wd>

<space/>

<wd l="5030" t="13507" r="5242" b="13656">F1</wd>

<space/>

<wd l="5338" t="13555" r="5784" b="13661">score</wd>

<space/>

</ln>

<ln l="1430" t="13752" r="5779" b="13954" baseLine="13906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13752" r="1632" b="13910">of</wd>

<space/>

<wd l="1723" t="13752" r="1987" b="13910">the</wd>

<space/>

<wd l="2112" t="13752" r="3120" b="13910">constrained</wd>

<space/>

<wd l="3235" t="13752" r="3720" b="13910">mode</wd>

<space/>

<wd l="3845" t="13752" r="4675" b="13910">decreases</wd>

<space/>

<wd l="4790" t="13752" r="5016" b="13954">by</wd>

<space/>

<wd l="5136" t="13757" r="5779" b="13910">0.0521.</wd>

<space/>

</ln>

<ln l="1430" t="14006" r="5794" b="14208" baseLine="14160" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="14006" r="1762" b="14165">The</wd>

<space/>

<wd l="1858" t="14011" r="2246" b="14165">POS</wd>

<space/>

<wd l="2347" t="14006" r="3005" b="14208">tagging</wd>

<space/>

<wd l="3101" t="14006" r="3782" b="14165">features</wd>

<space/>

<wd l="3883" t="14006" r="4728" b="14165">constitute</wd>

<space/>

<wd l="4824" t="14006" r="5088" b="14165">the</wd>

<space/>

<wd l="5189" t="14006" r="5794" b="14165">second</wd>

<space/>

</ln>

<ln l="1426" t="14261" r="5789" b="14462" baseLine="14414" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="14285" r="1858" b="14419">most</wd>

<space/>

<wd l="1925" t="14261" r="2774" b="14462">important</wd>

<space/>

<wd l="2842" t="14261" r="3442" b="14419">feature</wd>

<space/>

<wd l="3523" t="14285" r="3802" b="14419">set.</wd>

<space/>

<wd l="3878" t="14261" r="4598" b="14419">Without</wd>

<space/>

<wd l="4666" t="14266" r="5054" b="14419">POS</wd>

<space/>

<wd l="5131" t="14261" r="5789" b="14462">tagging</wd>

<space/>

</ln>

<ln l="1430" t="14515" r="5789" b="14717" baseLine="14669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="14515" r="2165" b="14707">features,</wd>

<space/>

<wd l="2246" t="14515" r="2506" b="14674">the</wd>

<space/>

<wd l="2578" t="14520" r="2784" b="14669">F1</wd>

<space/>

<wd l="2890" t="14568" r="3336" b="14674">score</wd>

<space/>

<wd l="3413" t="14568" r="3806" b="14717">goes</wd>

<space/>

<wd l="3883" t="14515" r="4368" b="14674">down</wd>

<space/>

<wd l="4430" t="14515" r="4656" b="14717">by</wd>

<space/>

<wd l="4728" t="14520" r="5371" b="14674">0.0129.</wd>

<space/>

<wd l="5458" t="14515" r="5789" b="14674">The</wd>

<space/>

</ln>

<ln l="1435" t="14765" r="5784" b="14966" baseLine="14918" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1435" t="14765" r="1925" b="14966">string</wd>

<space/>

<wd l="2006" t="14765" r="2693" b="14923">features</wd>

<space/>

<wd l="2779" t="14818" r="3038" b="14923">are</wd>

<space/>

<wd l="3120" t="14765" r="3379" b="14923">the</wd>

<space/>

<wd l="3466" t="14765" r="3869" b="14923">least</wd>

<space/>

<wd l="3946" t="14765" r="4800" b="14966">important</wd>

<space/>

<wd l="4882" t="14789" r="5117" b="14923">set</wd>

<space/>

<wd l="5198" t="14765" r="5400" b="14923">of</wd>

<space/>

<wd l="5458" t="14765" r="5784" b="14923">fea-</wd>

</ln>

<ln l="0" t="0" r="0" b="0" baseLine="0" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<space/>

</ln>

</para>

</column>

<column l="6005" t="1429" r="10627" b="15383">

<para l="6134" t="1474" r="10498" b="1886" alignment="justified" li="72" ri="72" spaceBefore="3" spaceAfter="236" lsp="exactly" lspExact="253" language="en">

<ln l="6134" t="1474" r="10498" b="1675" baseLine="1627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="1498" r="6552" b="1632">tures</wd>

<space/>

<wd l="6638" t="1526" r="6806" b="1632">as</wd>

<space/>

<wd l="6888" t="1474" r="7267" b="1675">they</wd>

<space/>

<wd l="7339" t="1474" r="7704" b="1632">lead</wd>

<space/>

<wd l="7771" t="1498" r="7934" b="1632">to</wd>

<space/>

<wd l="8011" t="1526" r="8405" b="1675">very</wd>

<space/>

<wd l="8472" t="1474" r="9250" b="1675">marginal</wd>

<space/>

<wd l="9331" t="1474" r="10498" b="1675">improvement</wd>

<space/>

</ln>

<ln l="6139" t="1728" r="7157" b="1886" baseLine="1882" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="1728" r="6307" b="1882">in</wd>

<space/>

<wd l="6360" t="1733" r="6571" b="1882">F1</wd>

<space/>

<wd l="6658" t="1781" r="7157" b="1886">score.</wd>

</ln>

</para>

<table l="6019" t="2189" r="10613" b="3720" alignment="left" li="14" ri="14" spaceBefore="14" spaceAfter="14">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<gridTable>

<gridCol>2098</gridCol>

<gridCol>897</gridCol>

<gridCol>720</gridCol>

<gridCol>879</gridCol>

<gridRow>197</gridRow>

<gridRow>192</gridRow>

<gridRow>379</gridRow>

<gridRow>379</gridRow>

<gridRow>384</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6019" t="2189" r="8117" b="2386" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Arial" fontFamily="swiss" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8222" t="2237" r="8851" b="2352" alignment="centered" lsp="exactly" lspExact="167" language="en">

<ln l="8222" t="2237" r="8851" b="2352" baseLine="2347" bold="true" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8222" t="2237" r="8851" b="2352">Precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9120" t="2242" r="9547" b="2352" alignment="centered" lsp="exactly" lspExact="167" language="en">

<ln l="9120" t="2242" r="9547" b="2352" baseLine="2347" bold="true" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9120" t="2242" r="9542" b="2352">Recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9840" t="2237" r="10435" b="2352" alignment="centered" lsp="exactly" lspExact="167" language="en">

<ln l="9840" t="2237" r="10435" b="2352" baseLine="2347" bold="true" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="9840" t="2237" r="10008" b="2347">F1</wd>

<space/>

<wd l="10066" t="2237" r="10435" b="2352">Score</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="2429" r="7848" b="2544" alignment="left" li="115" lsp="exactly" lspExact="191" language="en">

<ln l="6139" t="2429" r="7848" b="2544" baseLine="2539" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2429" r="6912" b="2544">Constrained</wd>

<space/>

<wd l="6950" t="2429" r="7114" b="2544">w/</wd>

<space/>

<wd l="7152" t="2429" r="7306" b="2544">all</wd>

<space/>

<wd l="7354" t="2429" r="7848" b="2544">features</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8227" t="2429" r="8645" b="2544" alignment="left" lsp="exactly" lspExact="191" language="en">

<tabs position="8227"/>

<ln l="8227" t="2429" r="8645" b="2544" baseLine="2539" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="8227" t="2429" r="8645" b="2544">0.9061</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9125" t="2429" r="9552" b="2544" alignment="left" lsp="exactly" lspExact="191" language="en">

<tabs position="9125"/>

<ln l="9125" t="2429" r="9552" b="2544" baseLine="2539" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9125" t="2429" r="9552" b="2544">0.7865</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9845" t="2429" r="10262" b="2544" alignment="left" lsp="exactly" lspExact="191" language="en">

<tabs position="9845"/>

<ln l="9845" t="2429" r="10262" b="2544" baseLine="2539" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9845" t="2429" r="10262" b="2544">0.8421</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="2621" r="8011" b="2923" alignment="left" li="108" lsp="exactly" lspExact="184" language="en">

<ln l="6139" t="2621" r="8011" b="2770" baseLine="2731" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2621" r="6912" b="2736">Constrained</wd>

<space/>

<wd l="6955" t="2621" r="7195" b="2736">w/o</wd>

<space/>

<wd l="7253" t="2635" r="7733" b="2770">support</wd>

<space/>

<wd l="7781" t="2621" r="8011" b="2736">and</wd>

<space/>

</ln>

<ln l="6139" t="2808" r="7378" b="2923" baseLine="2918" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="2808" r="6835" b="2923">confidence</wd>

<space/>

<wd l="6883" t="2808" r="7378" b="2923">features</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8227" t="2621" r="8650" b="2736" alignment="left" spaceAfter="177" lsp="exactly" lspExact="192" language="en">

<tabs position="8227"/>

<ln l="8227" t="2621" r="8650" b="2736" baseLine="2731" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8227" t="2621" r="8650" b="2736">0.9423</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9125" t="2621" r="9547" b="2736" alignment="left" spaceAfter="177" lsp="exactly" lspExact="192" language="en">

<tabs position="9125"/>

<ln l="9125" t="2621" r="9547" b="2736" baseLine="2731" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9125" t="2621" r="9547" b="2736">0.6803</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9845" t="2621" r="10262" b="2736" alignment="left" spaceAfter="177" lsp="exactly" lspExact="192" language="en">

<tabs position="9845"/>

<ln l="9845" t="2621" r="10262" b="2736" baseLine="2731" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9845" t="2621" r="10262" b="2736">0.7901</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6139" t="3000" r="8002" b="3331" alignment="left" li="108" lsp="exactly" lspExact="183" language="en">

<ln l="6139" t="3000" r="8002" b="3149" baseLine="3110" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="3000" r="6912" b="3115">Constrained</wd>

<space/>

<wd l="7013" t="3000" r="7253" b="3115">w/o</wd>

<space/>

<wd l="7358" t="3000" r="7646" b="3115">POS</wd>

<space/>

<wd l="7757" t="3014" r="8002" b="3149">tag-</wd>

<space/>

</ln>

<ln l="6139" t="3187" r="6960" b="3331" baseLine="3298" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="3187" r="6418" b="3331">ging</wd>

<space/>

<wd l="6466" t="3187" r="6960" b="3302">features</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8227" t="3000" r="8578" b="3115" alignment="left" spaceAfter="172" lsp="exactly" lspExact="192" language="en">

<tabs position="8227"/>

<ln l="8227" t="3000" r="8578" b="3115" baseLine="3110" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8227" t="3000" r="8578" b="3115">0.902</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9125" t="3000" r="9547" b="3115" alignment="left" spaceAfter="172" lsp="exactly" lspExact="192" language="en">

<tabs position="9125"/>

<ln l="9125" t="3000" r="9547" b="3115" baseLine="3110" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9125" t="3000" r="9547" b="3115">0.7673</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9845" t="3000" r="10277" b="3115" alignment="left" spaceAfter="172" lsp="exactly" lspExact="192" language="en">

<tabs position="9845"/>

<ln l="9845" t="3000" r="10277" b="3115" baseLine="3110" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9845" t="3000" r="10277" b="3115">0.8292</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6134" t="3379" r="8002" b="3677" alignment="left" li="108" lsp="exactly" lspExact="181" language="en">

<ln l="6139" t="3379" r="8002" b="3528" baseLine="3490" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="3379" r="6912" b="3494">Constrained</wd>

<space/>

<wd l="6989" t="3379" r="7229" b="3494">w/o</wd>

<space/>

<wd l="7315" t="3379" r="7675" b="3528">string</wd>

<space/>

<wd l="7762" t="3379" r="8002" b="3494">fea-</wd>

<space/>

</ln>

<ln l="6134" t="3571" r="6442" b="3677" baseLine="3672" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="3576" r="6442" b="3677">tures</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8227" t="3379" r="8659" b="3494" alignment="left" spaceAfter="181" lsp="exactly" lspExact="192" language="en">

<tabs position="8227"/>

<ln l="8227" t="3379" r="8659" b="3494" baseLine="3490" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8227" t="3379" r="8659" b="3494">0.9102</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9125" t="3379" r="9552" b="3494" alignment="left" spaceAfter="181" lsp="exactly" lspExact="192" language="en">

<tabs position="9125"/>

<ln l="9125" t="3379" r="9552" b="3494" baseLine="3490" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9125" t="3379" r="9552" b="3494">0.7825</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9845" t="3379" r="10277" b="3494" alignment="left" spaceAfter="181" lsp="exactly" lspExact="192" language="en">

<tabs position="9845"/>

<ln l="9845" t="3379" r="10277" b="3494" baseLine="3490" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9845" t="3379" r="10277" b="3494">0.8416</wd>

</ln>

</para>

</cell>

</table>

<para l="6139" t="3758" r="9706" b="3922" alignment="left" li="72" ri="72" lsp="exactly" lspExact="195" language="en">

<ln l="6139" t="3758" r="9706" b="3922" baseLine="3883" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="3763" r="6571" b="3888">Table</wd>

<space/>

<wd l="6619" t="3763" r="6758" b="3888">3:</wd>

<space/>

<wd l="6816" t="3763" r="7714" b="3922">Importance</wd>

<space/>

<wd l="7766" t="3758" r="7925" b="3888">of</wd>

<space/>

<wd l="7958" t="3758" r="8990" b="3888">Classification</wd>

<space/>

<wd l="9038" t="3763" r="9706" b="3888">Features</wd>

</ln>

</para>

<para l="6130" t="4171" r="10493" b="5136" alignment="justified" li="72" ri="72" spaceBefore="203" fli="288" lsp="exactly" lspExact="252" language="en">

<ln l="6365" t="4171" r="10493" b="4373" baseLine="4325" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">

<wd l="6365" t="4181" r="6547" b="4325">In</wd>

<space/>

<wd l="6658" t="4171" r="7147" b="4330">Table</wd>

<space/>

<wd l="7258" t="4176" r="7411" b="4363">4,</wd>

<space/>

<wd l="7531" t="4224" r="7781" b="4330">we</wd>

<space/>

<wd l="7896" t="4195" r="8424" b="4373">report</wd>

<space/>

<wd l="8530" t="4171" r="8789" b="4330">the</wd>

<space/>

<wd l="8909" t="4171" r="9821" b="4330">evaluation</wd>

<space/>

<wd l="9926" t="4171" r="10493" b="4330">results</wd>

<space/>

</ln>

<ln l="6130" t="4426" r="10493" b="4627" baseLine="4579">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8"><wd l="6130" t="4426" r="6638" b="4584">based</wd>

<space/>

<wd l="6749" t="4478" r="6965" b="4584">on</wd>

<space/>

<wd l="7070" t="4426" r="7334" b="4584">the</wd>

<space/>

<wd l="7450" t="4450" r="7757" b="4584">test</wd>

<space/>

<wd l="7867" t="4426" r="8227" b="4584">data</wd>

<space/>

<wd l="8342" t="4426" r="8621" b="4584">file</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8"><wd l="8741" t="4426" r="9994" b="4627">test_truth.json</wd>

<space/>

</run>

<wd l="10114" t="4478" r="10493" b="4584" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">con-</wd>

</ln>

<ln l="6139" t="4680" r="10488" b="4882" baseLine="4834" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">

<wd l="6139" t="4680" r="6696" b="4838">cealed</wd>

<space/>

<wd l="6768" t="4680" r="7186" b="4838">from</wd>

<space/>

<wd l="7258" t="4680" r="8434" b="4882">development.</wd>

<space/>

<wd l="8510" t="4690" r="8818" b="4838">For</wd>

<space/>

<wd l="8885" t="4680" r="9893" b="4838">constrained</wd>

<space/>

<wd l="9955" t="4680" r="10488" b="4872">mode,</wd>

<space/>

</ln>

<ln l="6134" t="4934" r="10493" b="5136" baseLine="5088" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">

<wd l="6134" t="4987" r="6384" b="5093">we</wd>

<space/>

<wd l="6658" t="4934" r="6926" b="5093">list</wd>

<space/>

<wd l="7186" t="4934" r="7450" b="5093">the</wd>

<space/>

<wd l="7718" t="4958" r="8395" b="5136">top-two</wd>

<space/>

<wd l="8664" t="4934" r="9230" b="5093">results</wd>

<space/>

<wd l="9499" t="4934" r="9725" b="5136">by</wd>

<space/>

<wd l="9989" t="4958" r="10493" b="5093">teams</wd>

</ln>

</para>

<para l="6130" t="5184" r="10502" b="6653" alignment="justified" li="72" ri="72" spaceAfter="222" lsp="exactly" lspExact="253" language="en">

<tabs position="6130"/>

<tabs alignment="right" position="6653" leaderChar=" "/>

<ln l="6130" t="5184" r="10502" b="5386" baseLine="5338" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6130" t="5189" r="7891" b="5386">NCSU_SAS_NING</wd>

<tab position="7891"/>

<wd l="8587" t="5184" r="9490" b="5386">(Ning.cm)</wd>

<tab position="9490"/>

<wd l="10186" t="5184" r="10502" b="5342">and
</wd>

</ln>

<ln l="6130" t="5438" r="10502" b="5640" baseLine="5592" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6130" t="5443" r="8453" b="5640">NCSU_SAS_WOOKHEE</wd>

<space/>

<wd l="8645" t="5438" r="9998" b="5640">(Wookhee.cm).</wd>

<space/>

<wd l="10195" t="5448" r="10502" b="5597">For</wd>

<space/>

</ln>

<ln l="6134" t="5693" r="10502" b="5894" baseLine="5846" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6134" t="5693" r="7368" b="5851">unconstrained</wd>

<space/>

<wd l="7483" t="5693" r="8016" b="5885">mode,</wd>

<space/>

<wd l="8141" t="5746" r="8395" b="5851">we</wd>

<space/>

<wd l="8520" t="5693" r="8784" b="5851">list</wd>

<space/>

<wd l="8899" t="5693" r="9158" b="5851">the</wd>

<space/>

<wd l="9283" t="5717" r="9552" b="5894">top</wd>

<space/>

<wd l="9677" t="5693" r="10166" b="5851">result</wd>

<space/>

<wd l="10277" t="5693" r="10502" b="5894">by</wd>

<space/>

</ln>

<ln l="6134" t="5942" r="10502" b="6144" baseLine="6096" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6134" t="5966" r="6562" b="6101">team</wd>

<space/>

<wd l="6672" t="5947" r="7426" b="6144">IHS_RD</wd>

<space/>

<wd l="7546" t="5942" r="8784" b="6144">(IHS_RD.um)</wd>

<space/>

<wd l="8899" t="5942" r="9211" b="6101">and</wd>

<space/>

<wd l="9317" t="5942" r="9581" b="6101">the</wd>

<space/>

<wd l="9686" t="5942" r="10176" b="6101">result</wd>

<space/>

<wd l="10277" t="5942" r="10502" b="6144">by</wd>

<space/>

</ln>

<ln l="6139" t="6197" r="10502" b="6398" baseLine="6350" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6139" t="6250" r="6432" b="6355">our</wd>

<space/>

<wd l="6523" t="6250" r="6898" b="6355">own</wd>

<space/>

<wd l="6989" t="6197" r="8222" b="6355">unconstrained</wd>

<space/>

<wd l="8314" t="6197" r="8798" b="6355">mode</wd>

<space/>

<wd l="8899" t="6197" r="9859" b="6398">(Ning.um),</wd>

<space/>

<wd l="9965" t="6197" r="10502" b="6355">which</wd>

<space/>

</ln>

<ln l="6134" t="6451" r="9946" b="6653" baseLine="6605" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6134" t="6504" r="6470" b="6610">was</wd>

<space/>

<wd l="6538" t="6451" r="7435" b="6653">developed</wd>

<space/>

<wd l="7498" t="6451" r="7901" b="6610">after</wd>

<space/>

<wd l="7949" t="6451" r="8213" b="6610">the</wd>

<space/>

<wd l="8280" t="6451" r="9326" b="6653">competition</wd>

<space/>

<wd l="9384" t="6451" r="9946" b="6610">ended.</wd>

</ln>

</para>

<table l="6019" t="6907" r="10613" b="7886" alignment="left" li="14" ri="14" spaceBefore="14" spaceAfter="14">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<gridTable>

<gridCol>835</gridCol>

<gridCol>811</gridCol>

<gridCol>1080</gridCol>

<gridCol>812</gridCol>

<gridCol>1056</gridCol>

<gridRow>202</gridRow>

<gridRow>192</gridRow>

<gridRow>192</gridRow>

<gridRow>196</gridRow>

<gridRow>197</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="1" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6134" t="6950" r="6581" b="7258" alignment="left" li="108" spaceAfter="3" lsp="exactly" lspExact="187" language="en">

<ln l="6134" t="6950" r="6581" b="7070" baseLine="7066" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6955" r="6581" b="7070">Perfor-</wd>

<space/>

</ln>

<ln l="6134" t="7142" r="6547" b="7258" baseLine="7253" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="7176" r="6547" b="7258">mance</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="6955" r="8146" b="7070" alignment="left" li="101" lsp="exactly" lspExact="182" language="en">

<ln l="6965" t="6955" r="8146" b="7070" baseLine="7066" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6965" t="6955" r="7738" b="7070">Constrained</wd>

<space/>

<wd l="7776" t="6955" r="8146" b="7070">Mode</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="4" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8851" t="6955" r="10195" b="7070" alignment="left" li="101" lsp="exactly" lspExact="182" language="en">

<ln l="8851" t="6955" r="10195" b="7070" baseLine="7066" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8851" t="6955" r="9787" b="7070">Unconstrained</wd>

<space/>

<wd l="9826" t="6955" r="10195" b="7070">Mode</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6955" t="7147" r="7517" b="7296" alignment="left" li="101" lsp="exactly" lspExact="191" language="en">

<ln l="6955" t="7147" r="7517" b="7296" baseLine="7262" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6955" t="7152" r="7517" b="7296">Ning.cm</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7771" t="7147" r="8621" b="7267" alignment="centered" lsp="exactly" lspExact="191" language="en">

<ln l="7771" t="7147" r="8621" b="7267" baseLine="7262" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7771" t="7152" r="8621" b="7267">Wookhee.cm</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8846" t="7147" r="9418" b="7301" alignment="left" li="101" lsp="exactly" lspExact="191" language="en">

<ln l="8846" t="7147" r="9418" b="7301" baseLine="7262" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="8846" t="7152" r="9418" b="7301">Ning.um</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9662" t="7147" r="10469" b="7267" alignment="centered" lsp="exactly" lspExact="191" language="en">

<ln l="9662" t="7147" r="10469" b="7267" baseLine="7262" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="9662" t="7152" r="9912" b="7267">IHS</wd>

<space/>

<wd l="10003" t="7157" r="10469" b="7267">RD.um</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6134" t="7339" r="6734" b="7459" alignment="left" li="115" lsp="exactly" lspExact="187" language="en">

<ln l="6134" t="7339" r="6734" b="7459" baseLine="7454" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="7344" r="6734" b="7459">Precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="7339" r="7382" b="7459" alignment="left" lsp="exactly" lspExact="187" language="en">

<tabs position="6965"/>

<ln l="6965" t="7339" r="7382" b="7459" baseLine="7454" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6965" t="7344" r="7382" b="7459">0.9061</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7776" t="7344" r="8208" b="7459" alignment="left" lsp="exactly" lspExact="187" language="en">

<tabs position="7776"/>

<ln l="7776" t="7344" r="8208" b="7459" baseLine="7454" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7776" t="7344" r="8208" b="7459">0.9136</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8856" t="7344" r="9288" b="7459" alignment="left" lsp="exactly" lspExact="172" language="en">

<tabs position="8856"/>

<ln l="8856" t="7344" r="9288" b="7459" baseLine="7454" bold="true" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8856" t="7344" r="9288" b="7459">0.9339</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9667" t="7344" r="10099" b="7459" alignment="left" lsp="exactly" lspExact="187" language="en">

<tabs position="9667"/>

<ln l="9667" t="7344" r="10099" b="7459" baseLine="7454" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9667" t="7344" r="10099" b="7459">0.8469</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6134" t="7536" r="6542" b="7656" alignment="left" li="115" lsp="exactly" lspExact="178" language="en">

<ln l="6134" t="7536" r="6542" b="7656" baseLine="7651" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="7541" r="6542" b="7656">Recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="7541" r="7392" b="7656" alignment="left" lsp="exactly" lspExact="178" language="en">

<tabs position="6965"/>

<ln l="6965" t="7541" r="7392" b="7656" baseLine="7651" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6965" t="7541" r="7392" b="7656">0.7865</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7776" t="7541" r="8203" b="7656" alignment="left" lsp="exactly" lspExact="178" language="en">

<tabs position="7776"/>

<ln l="7776" t="7541" r="8203" b="7656" baseLine="7651" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7776" t="7541" r="8203" b="7656">0.7398</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8856" t="7536" r="9288" b="7656" alignment="left" lsp="exactly" lspExact="178" language="en">

<tabs position="8856"/>

<ln l="8856" t="7536" r="9288" b="7656" baseLine="7651" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8856" t="7541" r="9288" b="7656">0.7582</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9667" t="7541" r="10094" b="7656" alignment="left" lsp="exactly" lspExact="162" language="en">

<tabs position="9667"/>

<ln l="9667" t="7541" r="10094" b="7656" baseLine="7651" bold="true" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9667" t="7541" r="10094" b="7656">0.8083</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6134" t="7728" r="6706" b="7848" alignment="left" li="115" lsp="exactly" lspExact="182" language="en">

<ln l="6134" t="7728" r="6706" b="7848" baseLine="7843" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="7733" r="6288" b="7843">F1</wd>

<space/>

<wd l="6355" t="7733" r="6706" b="7848">Score</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6965" t="7728" r="7382" b="7848" alignment="left" lsp="exactly" lspExact="171" language="en">

<tabs position="6965"/>

<ln l="6965" t="7728" r="7382" b="7848" baseLine="7843" bold="true" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6965" t="7733" r="7382" b="7848">0.8421</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7776" t="7733" r="8203" b="7848" alignment="left" lsp="exactly" lspExact="182" language="en">

<tabs position="7776"/>

<ln l="7776" t="7733" r="8203" b="7848" baseLine="7843" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7776" t="7733" r="8203" b="7848">0.8175</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8856" t="7733" r="9206" b="7848" alignment="left" lsp="exactly" lspExact="182" language="en">

<tabs position="8856"/>

<ln l="8856" t="7733" r="9206" b="7848" baseLine="7843" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="8856" t="7733" r="9206" b="7848">0.837</wd>

</ln>

</para>

</cell>

<cell gridColFrom="4" gridColTill="4" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9667" t="7728" r="10099" b="7848" alignment="left" lsp="exactly" lspExact="182" language="en">

<tabs position="9667"/>

<ln l="9667" t="7728" r="10099" b="7848" baseLine="7843" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9667" t="7733" r="10099" b="7848">0.8272</wd>

</ln>

</para>

</cell>

</table>

<para l="6139" t="7925" r="9264" b="8088" alignment="left" li="72" ri="72" lsp="exactly" lspExact="196" language="en">

<ln l="6139" t="7925" r="9264" b="8088" baseLine="8050" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="7930" r="6571" b="8054">Table</wd>

<space/>

<wd l="6619" t="7930" r="6758" b="8054">4:</wd>

<space/>

<wd l="6821" t="7925" r="7771" b="8088">Competition</wd>

<space/>

<wd l="7819" t="7925" r="8654" b="8054">Evaluation</wd>

<space/>

<wd l="8707" t="7930" r="9264" b="8054">Results</wd>

</ln>

</para>

<para l="6130" t="8342" r="10502" b="11582" alignment="justified" li="72" ri="72" spaceBefore="205" lsp="exactly" lspExact="252" language="en">

<ln l="6139" t="8342" r="10488" b="8544" baseLine="8496" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="8352" r="6269" b="8501">It</wd>

<space/>

<wd l="6346" t="8395" r="6643" b="8501">can</wd>

<space/>

<wd l="6715" t="8342" r="6922" b="8501">be</wd>

<space/>

<wd l="7008" t="8395" r="7392" b="8501">seen</wd>

<space/>

<wd l="7464" t="8342" r="7795" b="8501">that</wd>

<space/>

<wd l="7867" t="8395" r="8160" b="8501">our</wd>

<space/>

<wd l="8227" t="8342" r="9451" b="8501">normalization</wd>

<space/>

<wd l="9533" t="8366" r="10133" b="8544">system</wd>

<space/>

<wd l="10205" t="8342" r="10488" b="8501">has</wd>

<space/>

</ln>

<ln l="6134" t="8597" r="10498" b="8755" baseLine="8750" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="8597" r="6398" b="8755">the</wd>

<space/>

<wd l="6480" t="8597" r="6840" b="8755">best</wd>

<space/>

<wd l="6922" t="8602" r="7128" b="8750">F1</wd>

<space/>

<wd l="7248" t="8650" r="7694" b="8755">score</wd>

<space/>

<wd l="7790" t="8597" r="7958" b="8750">in</wd>

<space/>

<wd l="8035" t="8597" r="8429" b="8755">both</wd>

<space/>

<wd l="8520" t="8597" r="9528" b="8755">constrained</wd>

<space/>

<wd l="9610" t="8597" r="10094" b="8755">mode</wd>

<space/>

<wd l="10186" t="8597" r="10498" b="8755">and</wd>

<space/>

</ln>

<ln l="6134" t="8851" r="10502" b="9043" baseLine="9005" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="8851" r="7368" b="9010">unconstrained</wd>

<space/>

<wd l="7512" t="8851" r="8045" b="9010">mode.</wd>

<space/>

<wd l="8203" t="8861" r="8381" b="9005">In</wd>

<space/>

<wd l="8530" t="8851" r="8899" b="9043">fact,</wd>

<space/>

<wd l="9058" t="8904" r="9346" b="9010">our</wd>

<space/>

<wd l="9490" t="8851" r="10502" b="9010">constrained</wd>

<space/>

</ln>

<ln l="6134" t="9101" r="10502" b="9293" baseLine="9254" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="9101" r="6619" b="9259">mode</wd>

<space/>

<wd l="6715" t="9101" r="6998" b="9259">has</wd>

<space/>

<wd l="7094" t="9101" r="7358" b="9259">the</wd>

<space/>

<wd l="7450" t="9101" r="7810" b="9259">best</wd>

<space/>

<wd l="7901" t="9106" r="8102" b="9254">F1</wd>

<space/>

<wd l="8232" t="9154" r="8678" b="9259">score</wd>

<space/>

<wd l="8779" t="9101" r="9432" b="9293">overall,</wd>

<space/>

<wd l="9528" t="9101" r="10037" b="9259">better</wd>

<space/>

<wd l="10123" t="9101" r="10502" b="9259">than</wd>

<space/>

</ln>

<ln l="6139" t="9355" r="10493" b="9547" baseLine="9509" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="9408" r="6432" b="9514">our</wd>

<space/>

<wd l="6490" t="9355" r="7728" b="9514">unconstrained</wd>

<space/>

<wd l="7790" t="9355" r="8318" b="9547">mode,</wd>

<space/>

<wd l="8395" t="9355" r="8933" b="9514">which</wd>

<space/>

<wd l="9005" t="9408" r="9523" b="9514">seems</wd>

<space/>

<wd l="9600" t="9355" r="10493" b="9514">counterin-</wd>

</ln>

<ln l="6134" t="9610" r="10493" b="9802" baseLine="9763" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="9610" r="6739" b="9768">tuitive.</wd>

<space/>

<wd l="6850" t="9610" r="7579" b="9802">Besides,</wd>

<space/>

<wd l="7690" t="9610" r="7949" b="9768">the</wd>

<space/>

<wd l="8054" t="9610" r="9288" b="9768">unconstrained</wd>

<space/>

<wd l="9389" t="9610" r="9869" b="9768">mode</wd>

<space/>

<wd l="9979" t="9610" r="10114" b="9768">is</wd>

<space/>

<wd l="10224" t="9662" r="10493" b="9768">ex-</wd>

</ln>

<ln l="6130" t="9859" r="10493" b="10061" baseLine="10013" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9859" r="6710" b="10061">pected</wd>

<space/>

<wd l="6830" t="9883" r="6994" b="10018">to</wd>

<space/>

<wd l="7133" t="9859" r="7790" b="10018">achieve</wd>

<space/>

<wd l="7920" t="9859" r="8486" b="10061">higher</wd>

<space/>

<wd l="8606" t="9859" r="9091" b="10018">recall</wd>

<space/>

<wd l="9216" t="9859" r="9590" b="10018">than</wd>

<space/>

<wd l="9720" t="9859" r="9979" b="10018">the</wd>

<space/>

<wd l="10114" t="9912" r="10493" b="10018">con-</wd>

</ln>

<ln l="6144" t="10114" r="10493" b="10315" baseLine="10267" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="10114" r="6830" b="10272">strained</wd>

<space/>

<wd l="6941" t="10114" r="7421" b="10272">mode</wd>

<space/>

<wd l="7531" t="10114" r="8222" b="10272">because</wd>

<space/>

<wd l="8342" t="10114" r="8544" b="10272">of</wd>

<space/>

<wd l="8630" t="10114" r="8827" b="10272">its</wd>

<space/>

<wd l="8942" t="10114" r="9432" b="10272">much</wd>

<space/>

<wd l="9542" t="10114" r="10056" b="10315">larger</wd>

<space/>

<wd l="10162" t="10114" r="10493" b="10272">dic-</wd>

</ln>

<ln l="6134" t="10368" r="10493" b="10570" baseLine="10522" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="10368" r="6802" b="10570">tionary,</wd>

<space/>

<wd l="6893" t="10368" r="7181" b="10526">but</wd>

<space/>

<wd l="7262" t="10368" r="7522" b="10526">the</wd>

<space/>

<wd l="7618" t="10368" r="8530" b="10526">evaluation</wd>

<space/>

<wd l="8611" t="10368" r="9178" b="10526">results</wd>

<space/>

<wd l="9278" t="10368" r="9734" b="10526">show</wd>

<space/>

<wd l="9816" t="10368" r="10147" b="10526">that</wd>

<space/>

<wd l="10234" t="10368" r="10493" b="10526">the</wd>

<space/>

</ln>

<ln l="6134" t="10618" r="10502" b="10819" baseLine="10771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="10618" r="7368" b="10776">unconstrained</wd>

<space/>

<wd l="7450" t="10618" r="7930" b="10776">mode</wd>

<space/>

<wd l="8016" t="10618" r="8304" b="10776">has</wd>

<space/>

<wd l="8395" t="10618" r="8894" b="10776">lower</wd>

<space/>

<wd l="8971" t="10618" r="9456" b="10776">recall</wd>

<space/>

<wd l="9547" t="10618" r="9859" b="10776">and</wd>

<space/>

<wd l="9941" t="10618" r="10502" b="10819">higher</wd>

<space/>

</ln>

<ln l="6130" t="10872" r="10493" b="11074" baseLine="11026" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10872" r="6941" b="11074">precision</wd>

<space/>

<wd l="7003" t="10872" r="7378" b="11030">than</wd>

<space/>

<wd l="7440" t="10872" r="7704" b="11030">the</wd>

<space/>

<wd l="7776" t="10872" r="8784" b="11030">constrained</wd>

<space/>

<wd l="8846" t="10872" r="9379" b="11030">mode.</wd>

<space/>

<wd l="9456" t="10872" r="9787" b="11030">The</wd>

<space/>

<wd l="9859" t="10872" r="10493" b="11030">follow-</wd>

</ln>

<ln l="6139" t="11126" r="10502" b="11328" baseLine="11280" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6139" t="11126" r="6413" b="11328">ing</wd>

<space/>

<wd l="6480" t="11126" r="6912" b="11285">three</wd>

<space/>

<wd l="6984" t="11126" r="7570" b="11285">factors</wd>

<space/>

<wd l="7646" t="11126" r="8011" b="11285">lead</wd>

<space/>

<wd l="8074" t="11150" r="8237" b="11285">to</wd>

<space/>

<wd l="8309" t="11126" r="8568" b="11285">the</wd>

<space/>

<wd l="8645" t="11126" r="9302" b="11285">inferior</wd>

<space/>

<wd l="9360" t="11131" r="9571" b="11280">F1</wd>

<space/>

<wd l="9667" t="11179" r="10114" b="11285">score</wd>

<space/>

<wd l="10186" t="11126" r="10502" b="11285">and</wd>

<space/>

</ln>

<ln l="6134" t="11381" r="9125" b="11582" baseLine="11534" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="11381" r="6619" b="11539">recall</wd>

<space/>

<wd l="6672" t="11381" r="6898" b="11582">by</wd>

<space/>

<wd l="6960" t="11434" r="7248" b="11539">our</wd>

<space/>

<wd l="7301" t="11381" r="8534" b="11539">unconstrained</wd>

<space/>

<wd l="8592" t="11381" r="9125" b="11539">mode:</wd>

</ln>

</para>

<para l="6134" t="11630" r="10502" b="14362" alignment="justified" li="72" ri="72" fli="288" lsp="exactly" lspExact="252" language="en">

<ln l="6365" t="11630" r="10502" b="11832" baseLine="11784" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6365" t="11630" r="6696" b="11789">The</wd>

<space/>

<wd l="6830" t="11630" r="7320" b="11789">much</wd>

<space/>

<wd l="7450" t="11630" r="7963" b="11832">larger</wd>

<space/>

<wd l="8088" t="11630" r="8923" b="11789">canonical</wd>

<space/>

<wd l="9058" t="11630" r="9480" b="11789">form</wd>

<space/>

<wd l="9614" t="11630" r="10502" b="11832">dictionary</wd>

<space/>

</ln>

<ln l="6134" t="11885" r="10502" b="12086" baseLine="12038" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="11885" r="6538" b="12043">used</wd>

<space/>

<wd l="6624" t="11885" r="6845" b="12086">by</wd>

<space/>

<wd l="6936" t="11885" r="7195" b="12043">the</wd>

<space/>

<wd l="7291" t="11885" r="8525" b="12043">unconstrained</wd>

<space/>

<wd l="8611" t="11885" r="9096" b="12043">mode</wd>

<space/>

<wd l="9197" t="11885" r="9917" b="12043">contains</wd>

<space/>

<wd l="10013" t="11938" r="10502" b="12086">many</wd>

<space/>

</ln>

<ln l="6134" t="12139" r="10493" b="12341" baseLine="12293" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="12139" r="6648" b="12341">rarely</wd>

<space/>

<wd l="6710" t="12139" r="7114" b="12298">used</wd>

<space/>

<wd l="7176" t="12139" r="7704" b="12298">words</wd>

<space/>

<wd l="7776" t="12139" r="8093" b="12298">and</wd>

<space/>

<wd l="8150" t="12139" r="8746" b="12341">having</wd>

<space/>

<wd l="8822" t="12139" r="9216" b="12298">such</wd>

<space/>

<wd l="9278" t="12139" r="9806" b="12298">words</wd>

<space/>

<wd l="9883" t="12192" r="10051" b="12298">as</wd>

<space/>

<wd l="10128" t="12192" r="10493" b="12298">can-</wd>

</ln>

<ln l="6139" t="12389" r="10493" b="12590" baseLine="12542" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6139" t="12389" r="6749" b="12547">didates</wd>

<space/>

<wd l="6859" t="12442" r="7416" b="12547">causes</wd>

<space/>

<wd l="7522" t="12389" r="7781" b="12547">the</wd>

<space/>

<wd l="7886" t="12389" r="8722" b="12547">candidate</wd>

<space/>

<wd l="8822" t="12389" r="9734" b="12547">evaluation</wd>

<space/>

<wd l="9835" t="12442" r="10493" b="12590">compo-</wd>

</ln>

<ln l="6134" t="12643" r="10493" b="12845" baseLine="12797" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="12667" r="6514" b="12802">nent</wd>

<space/>

<wd l="6590" t="12667" r="6754" b="12802">to</wd>

<space/>

<wd l="6835" t="12643" r="7042" b="12802">be</wd>

<space/>

<wd l="7123" t="12696" r="7570" b="12802">more</wd>

<space/>

<wd l="7661" t="12643" r="8760" b="12802">conservative</wd>

<space/>

<wd l="8846" t="12643" r="9014" b="12797">in</wd>

<space/>

<wd l="9101" t="12643" r="9869" b="12845">selecting</wd>

<space/>

<wd l="9955" t="12643" r="10493" b="12802">candi-</wd>

</ln>

<ln l="6139" t="12898" r="10493" b="13099" baseLine="13051" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6139" t="12898" r="6581" b="13056">dates</wd>

<space/>

<wd l="6648" t="12898" r="7099" b="13056">other</wd>

<space/>

<wd l="7157" t="12898" r="7536" b="13056">than</wd>

<space/>

<wd l="7594" t="12898" r="7853" b="13056">the</wd>

<space/>

<wd l="7925" t="12898" r="8602" b="13099">original</wd>

<space/>

<wd l="8664" t="12898" r="9226" b="13056">tokens</wd>

<space/>

<wd l="9298" t="12898" r="9931" b="13099">(higher</wd>

<space/>

<wd l="9984" t="12898" r="10493" b="13099">preci-</wd>

</ln>

<ln l="6144" t="13152" r="10493" b="13354" baseLine="13306" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6144" t="13152" r="6504" b="13310">sion</wd>

<space/>

<wd l="6581" t="13152" r="6898" b="13310">and</wd>

<space/>

<wd l="6974" t="13152" r="7474" b="13310">lower</wd>

<space/>

<wd l="7550" t="13152" r="8155" b="13354">recall).</wd>

<space/>

<wd l="8242" t="13157" r="8395" b="13306">A</wd>

<space/>

<wd l="8472" t="13152" r="9240" b="13354">potential</wd>

<space/>

<wd l="9331" t="13152" r="10032" b="13310">solution</wd>

<space/>

<wd l="10114" t="13152" r="10243" b="13310">is</wd>

<space/>

<wd l="10330" t="13176" r="10493" b="13310">to</wd>

<space/>

</ln>

<ln l="6134" t="13402" r="10502" b="13603" baseLine="13555" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="13454" r="6422" b="13560">use</wd>

<space/>

<wd l="6509" t="13454" r="6600" b="13560">a</wd>

<space/>

<wd l="6682" t="13402" r="7325" b="13560">smaller</wd>

<space/>

<wd l="7402" t="13402" r="8290" b="13603">dictionary</wd>

<space/>

<wd l="8366" t="13402" r="8568" b="13560">of</wd>

<space/>

<wd l="8616" t="13426" r="9048" b="13560">most</wd>

<space/>

<wd l="9125" t="13402" r="10022" b="13603">frequently</wd>

<space/>

<wd l="10099" t="13402" r="10502" b="13560">used</wd>

<space/>

</ln>

<ln l="6134" t="13656" r="10498" b="13858" baseLine="13810" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6134" t="13656" r="6662" b="13814">words</wd>

<space/>

<wd l="6773" t="13656" r="7392" b="13814">instead</wd>

<space/>

<wd l="7488" t="13656" r="7690" b="13814">of</wd>

<space/>

<wd l="7766" t="13709" r="7858" b="13814">a</wd>

<space/>

<wd l="7958" t="13656" r="8386" b="13858">large</wd>

<space/>

<wd l="8491" t="13656" r="9379" b="13858">dictionary</wd>

<space/>

<wd l="9480" t="13709" r="9662" b="13814">or</wd>

<space/>

<wd l="9754" t="13680" r="9912" b="13814">to</wd>

<space/>

<wd l="10018" t="13709" r="10306" b="13814">use</wd>

<space/>

<wd l="10406" t="13709" r="10498" b="13814">a</wd>

<space/>

</ln>

<ln l="6139" t="13910" r="10493" b="14112" baseLine="14064" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6139" t="13910" r="7027" b="14112">dictionary</wd>

<space/>

<wd l="7099" t="13910" r="7488" b="14069">with</wd>

<space/>

<wd l="7560" t="13910" r="8011" b="14069">word</wd>

<space/>

<wd l="8088" t="13910" r="8962" b="14112">frequency</wd>

<space/>

<wd l="9024" t="13910" r="9533" b="14069">based</wd>

<space/>

<wd l="9610" t="13963" r="9821" b="14069">on</wd>

<space/>

<wd l="9898" t="13963" r="9989" b="14069">a</wd>

<space/>

<wd l="10066" t="13910" r="10493" b="14112">large</wd>

<space/>

</ln>

<ln l="6139" t="14170" r="6763" b="14362" baseLine="14314" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6139" t="14213" r="6763" b="14362">corpus.</wd>

</ln>

</para>

<para l="6134" t="14414" r="10498" b="15374" alignment="justified" li="72" ri="72" fli="288" lsp="exactly" lspExact="252" language="en">

<ln l="6360" t="14414" r="10493" b="14606" baseLine="14568" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6360" t="14424" r="6816" b="14573">Even</wd>

<space/>

<wd l="6907" t="14414" r="7061" b="14568">if</wd>

<space/>

<wd l="7123" t="14467" r="7373" b="14573">we</wd>

<space/>

<wd l="7474" t="14414" r="8146" b="14573">exclude</wd>

<space/>

<wd l="8237" t="14414" r="8501" b="14573">the</wd>

<space/>

<wd l="8597" t="14467" r="8928" b="14573">rare</wd>

<space/>

<wd l="9024" t="14414" r="9605" b="14606">words,</wd>

<space/>

<wd l="9706" t="14414" r="9965" b="14573">the</wd>

<space/>

<wd l="10061" t="14467" r="10493" b="14573">mere</wd>

<space/>

</ln>

<ln l="6139" t="14669" r="10498" b="14870" baseLine="14822" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6139" t="14669" r="6850" b="14827">increase</wd>

<space/>

<wd l="7013" t="14669" r="7181" b="14822">in</wd>

<space/>

<wd l="7334" t="14669" r="8006" b="14827">number</wd>

<space/>

<wd l="8165" t="14669" r="8366" b="14827">of</wd>

<space/>

<wd l="8501" t="14669" r="9413" b="14827">candidates</wd>

<space/>

<wd l="9571" t="14722" r="9859" b="14870">per</wd>

<space/>

<wd l="10013" t="14669" r="10498" b="14827">token</wd>

<space/>

</ln>

<ln l="6134" t="14918" r="10493" b="15120" baseLine="15072" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6134" t="14918" r="6686" b="15077">makes</wd>

<space/>

<wd l="6773" t="14918" r="7541" b="15120">selecting</wd>

<space/>

<wd l="7618" t="14918" r="7877" b="15077">the</wd>

<space/>

<wd l="7958" t="14942" r="8563" b="15077">correct</wd>

<space/>

<wd l="8635" t="14918" r="9470" b="15077">candidate</wd>

<space/>

<wd l="9542" t="14971" r="9984" b="15077">more</wd>

<space/>

<wd l="10066" t="14918" r="10493" b="15077">chal-</wd>

</ln>

<ln l="6139" t="15173" r="10493" b="15374" baseLine="15326" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6139" t="15173" r="6840" b="15374">lenging.</wd>

<space/>

<wd l="6955" t="15182" r="7267" b="15331">For</wd>

<space/>

<wd l="7373" t="15173" r="8155" b="15374">example,</wd>

<space/>

<wd l="8280" t="15226" r="8568" b="15331">our</wd>

<space/>

<wd l="8674" t="15173" r="9907" b="15331">unconstrained</wd>

<space/>

<wd l="10013" t="15173" r="10493" b="15331">mode</wd>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15746" r="6161" b="15975">

<para l="5804" t="15787" r="6128" b="15946" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5870" t="15787" r="6062" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="24">

<wd l="5870" t="15792" r="6062" b="15946">91</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4313.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1323" marginTop="1429" marginRight="1283" marginBottom="1302" offsetX="-8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1323" t="1429" r="10626" b="15339">

<column l="1323" t="1429" r="5917" b="15339">

<para l="1426" t="1474" r="5794" b="4162" alignment="justified" li="72" ri="144" spaceBefore="1" lsp="exactly" lspExact="253" language="en">

<ln l="1435" t="1474" r="5789" b="1675" baseLine="1627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1435" t="1474" r="2501" b="1675">successfully</wd>

<space/>

<wd l="2592" t="1498" r="3317" b="1675">suggests</wd>

<space/>

<wd l="3408" t="1474" r="4426" b="1675">“Brooklyn”</wd>

<space/>

<wd l="4517" t="1526" r="4685" b="1632">as</wd>

<space/>

<wd l="4776" t="1526" r="4867" b="1632">a</wd>

<space/>

<wd l="4954" t="1474" r="5789" b="1632">candidate</wd>

<space/>

</ln>

<ln l="1430" t="1728" r="5784" b="1930" baseLine="1882" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="1728" r="1685" b="1886">for</wd>

<space/>

<wd l="1752" t="1728" r="2242" b="1886">token</wd>

<space/>

<wd l="2318" t="1728" r="3163" b="1930">“Brklyn”,</wd>

<space/>

<wd l="3245" t="1728" r="3782" b="1886">which</wd>

<space/>

<wd l="3859" t="1781" r="4152" b="1886">our</wd>

<space/>

<wd l="4224" t="1728" r="5232" b="1886">constrained</wd>

<space/>

<wd l="5304" t="1728" r="5784" b="1886">mode</wd>

<space/>

</ln>

<ln l="1430" t="1982" r="5794" b="2184" baseLine="2136" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="1982" r="1565" b="2141">is</wd>

<space/>

<wd l="1714" t="1982" r="2549" b="2184">incapable</wd>

<space/>

<wd l="2698" t="1982" r="2918" b="2174">of,</wd>

<space/>

<wd l="3062" t="1982" r="3350" b="2141">but</wd>

<space/>

<wd l="3490" t="1982" r="3749" b="2141">the</wd>

<space/>

<wd l="3898" t="1982" r="4733" b="2141">candidate</wd>

<space/>

<wd l="4882" t="1982" r="5794" b="2141">evaluation</wd>

<space/>

</ln>

<ln l="1430" t="2237" r="5784" b="2438" baseLine="2390" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="2261" r="2405" b="2438">component</wd>

<space/>

<wd l="2482" t="2237" r="2846" b="2395">fails</wd>

<space/>

<wd l="2928" t="2261" r="3086" b="2395">to</wd>

<space/>

<wd l="3178" t="2237" r="3672" b="2395">select</wd>

<space/>

<wd l="3749" t="2237" r="4762" b="2438">“Brooklyn”</wd>

<space/>

<wd l="4848" t="2290" r="5016" b="2395">as</wd>

<space/>

<wd l="5098" t="2237" r="5357" b="2395">the</wd>

<space/>

<wd l="5443" t="2290" r="5784" b="2395">cor-</wd>

</ln>

<ln l="1426" t="2486" r="5779" b="2688" baseLine="2640" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1426" t="2510" r="1757" b="2645">rect</wd>

<space/>

<wd l="1867" t="2486" r="2702" b="2645">canonical</wd>

<space/>

<wd l="2818" t="2486" r="3283" b="2645">form.</wd>

<space/>

<wd l="3403" t="2491" r="3557" b="2640">A</wd>

<space/>

<wd l="3667" t="2486" r="4440" b="2688">potential</wd>

<space/>

<wd l="4560" t="2486" r="5256" b="2645">solution</wd>

<space/>

<wd l="5371" t="2486" r="5506" b="2645">is</wd>

<space/>

<wd l="5621" t="2510" r="5779" b="2645">to</wd>

<space/>

</ln>

<ln l="1430" t="2741" r="5784" b="2899" baseLine="2894" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="2741" r="2069" b="2899">include</wd>

<space/>

<wd l="2170" t="2794" r="2616" b="2899">more</wd>

<space/>

<wd l="2722" t="2765" r="3365" b="2899">context</wd>

<space/>

<wd l="3466" t="2741" r="4502" b="2899">information</wd>

<space/>

<wd l="4598" t="2741" r="4858" b="2899">for</wd>

<space/>

<wd l="4954" t="2741" r="5784" b="2899">candidate</wd>

<space/>

</ln>

<ln l="1430" t="2995" r="5784" b="3197" baseLine="3149" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="2995" r="2386" b="3154">evaluation.</wd>

<space/>

<wd l="2467" t="3005" r="2779" b="3154">For</wd>

<space/>

<wd l="2851" t="2995" r="3634" b="3197">example,</wd>

<space/>

<wd l="3715" t="3019" r="4046" b="3154">text</wd>

<space/>

<wd l="4123" t="2995" r="5011" b="3154">likelihood</wd>

<space/>

<wd l="5088" t="2995" r="5784" b="3154">estimat-</wd>

</ln>

<ln l="1430" t="3245" r="5784" b="3446" baseLine="3398" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="3245" r="1637" b="3403">ed</wd>

<space/>

<wd l="1709" t="3245" r="1934" b="3446">by</wd>

<space/>

<wd l="2016" t="3298" r="2107" b="3403">a</wd>

<space/>

<wd l="2194" t="3250" r="2602" b="3403">CRF</wd>

<space/>

<wd l="2688" t="3245" r="3230" b="3403">model</wd>

<space/>

<wd l="3312" t="3245" r="3869" b="3403">before</wd>

<space/>

<wd l="3960" t="3245" r="4277" b="3403">and</wd>

<space/>

<wd l="4358" t="3245" r="4762" b="3403">after</wd>

<space/>

<wd l="4838" t="3245" r="5784" b="3403">normaliza-</wd>

</ln>

<ln l="1426" t="3499" r="5784" b="3658" baseLine="3653" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1426" t="3499" r="1771" b="3658">tion</wd>

<space/>

<wd l="1838" t="3552" r="2131" b="3658">can</wd>

<space/>

<wd l="2198" t="3499" r="2405" b="3658">be</wd>

<space/>

<wd l="2477" t="3499" r="3000" b="3658">added</wd>

<space/>

<wd l="3067" t="3552" r="3235" b="3658">as</wd>

<space/>

<wd l="3317" t="3499" r="4474" b="3658">classification</wd>

<space/>

<wd l="4541" t="3499" r="5275" b="3658">features.</wd>

<space/>

<wd l="5352" t="3509" r="5784" b="3658">Hav-</wd>

</ln>

<ln l="1430" t="3754" r="5784" b="3955" baseLine="3907" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="3754" r="1704" b="3955">ing</wd>

<space/>

<wd l="1766" t="3754" r="2218" b="3912">word</wd>

<space/>

<wd l="2285" t="3754" r="3158" b="3955">frequency</wd>

<space/>

<wd l="3226" t="3806" r="3394" b="3912">as</wd>

<space/>

<wd l="3466" t="3806" r="3557" b="3912">a</wd>

<space/>

<wd l="3624" t="3754" r="4224" b="3912">feature</wd>

<space/>

<wd l="4296" t="3806" r="4598" b="3912">can</wd>

<space/>

<wd l="4661" t="3754" r="5002" b="3912">also</wd>

<space/>

<wd l="5064" t="3754" r="5270" b="3912">be</wd>

<space/>

<wd l="5338" t="3754" r="5784" b="3955">help-</wd>

</ln>

<ln l="1430" t="4003" r="1714" b="4162" baseLine="4157" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="4003" r="1714" b="4162">ful.</wd>

</ln>

</para>

<para l="1426" t="4258" r="5784" b="5218" alignment="justified" li="72" ri="144" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1656" t="4258" r="5784" b="4459" baseLine="4411" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="4258" r="1987" b="4416">The</wd>

<space/>

<wd l="2126" t="4258" r="2693" b="4459">binary</wd>

<space/>

<wd l="2832" t="4258" r="3250" b="4416">class</wd>

<space/>

<wd l="3398" t="4258" r="4099" b="4459">labeling</wd>

<space/>

<wd l="4243" t="4258" r="4406" b="4411">in</wd>

<space/>

<wd l="4546" t="4258" r="4805" b="4416">the</wd>

<space/>

<wd l="4954" t="4258" r="5784" b="4416">candidate</wd>

<space/>

</ln>

<ln l="1430" t="4512" r="5784" b="4714" baseLine="4666" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="4512" r="2342" b="4670">evaluation</wd>

<space/>

<wd l="2525" t="4536" r="3499" b="4714">component</wd>

<space/>

<wd l="3682" t="4512" r="4070" b="4670">does</wd>

<space/>

<wd l="4258" t="4536" r="4541" b="4670">not</wd>

<space/>

<wd l="4723" t="4512" r="5784" b="4670">differentiate</wd>

<space/>

</ln>

<ln l="1426" t="4762" r="5774" b="4968" baseLine="4920">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="4766" r="2650" b="4925">normalization</wd>

<space/>

<wd l="2803" t="4766" r="3480" b="4925">without</wd>

<space/>

<wd l="3638" t="4766" r="4248" b="4968">change</wd>

<space/>

<wd l="4416" t="4766" r="4790" b="4968">(e.g.</wd>

<space/>

<wd l="4963" t="4771" r="5414" b="4925">“car”</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="5592" t="4762" r="5774" b="4920">4</wd>

<space/>

</run>

</ln>

<ln l="1430" t="5016" r="5784" b="5218" baseLine="5170" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="5016" r="1958" b="5218">“car”)</wd>

<space/>

<wd l="2035" t="5016" r="2453" b="5174">from</wd>

<space/>

<wd l="2525" t="5016" r="3744" b="5174">normalization</wd>

<space/>

<wd l="3811" t="5016" r="4200" b="5174">with</wd>

<space/>

<wd l="4272" t="5016" r="4882" b="5218">change</wd>

<space/>

<wd l="4963" t="5016" r="5333" b="5218">(e.g.</wd>

<space/>

<wd l="5419" t="5021" r="5784" b="5174">“ur”</wd>

</ln>

</para>

<para l="1421" t="5266" r="5813" b="8760" alignment="justified" li="72" ri="144" lsp="exactly" lspExact="253" language="en">

<ln l="1440" t="5266" r="5784" b="5472" baseLine="5424">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="1440" t="5266" r="1622" b="5424">4</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1742" t="5270" r="2453" b="5472">“your”).</wd>

<space/>

<wd l="2563" t="5275" r="2798" b="5429">As</wd>

<space/>

<wd l="2909" t="5323" r="3000" b="5429">a</wd>

<space/>

<wd l="3101" t="5270" r="3634" b="5462">result,</wd>

<space/>

<wd l="3739" t="5323" r="3994" b="5429">we</wd>

<space/>

<wd l="4099" t="5323" r="4358" b="5429">are</wd>

<space/>

<wd l="4459" t="5270" r="5040" b="5429">unable</wd>

<space/>

<wd l="5146" t="5294" r="5309" b="5429">to</wd>

<space/>

<wd l="5414" t="5294" r="5784" b="5429">tune</wd>

<space/>

</run>

</ln>

<ln l="1421" t="5525" r="5794" b="5726" baseLine="5678" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5549" r="2381" b="5726">parameters</wd>

<space/>

<wd l="2453" t="5549" r="2611" b="5683">to</wd>

<space/>

<wd l="2688" t="5525" r="3149" b="5683">favor</wd>

<space/>

<wd l="3206" t="5525" r="4430" b="5683">normalization</wd>

<space/>

<wd l="4488" t="5525" r="4882" b="5683">with</wd>

<space/>

<wd l="4944" t="5525" r="5554" b="5726">change</wd>

<space/>

<wd l="5626" t="5525" r="5794" b="5678">in</wd>

<space/>

</ln>

<ln l="1430" t="5774" r="5784" b="5976" baseLine="5928" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="5774" r="1896" b="5933">order</wd>

<space/>

<wd l="1954" t="5798" r="2117" b="5933">to</wd>

<space/>

<wd l="2194" t="5774" r="2856" b="5933">achieve</wd>

<space/>

<wd l="2928" t="5827" r="3019" b="5933">a</wd>

<space/>

<wd l="3082" t="5774" r="3590" b="5933">better</wd>

<space/>

<wd l="3648" t="5774" r="4440" b="5933">trade-off</wd>

<space/>

<wd l="4478" t="5774" r="5218" b="5933">between</wd>

<space/>

<wd l="5275" t="5774" r="5784" b="5976">preci-</wd>

</ln>

<ln l="1435" t="6029" r="5794" b="6230" baseLine="6182" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1435" t="6029" r="1795" b="6187">sion</wd>

<space/>

<wd l="1882" t="6029" r="2194" b="6187">and</wd>

<space/>

<wd l="2280" t="6029" r="2765" b="6187">recall</wd>

<space/>

<wd l="2856" t="6029" r="3490" b="6230">(higher</wd>

<space/>

<wd l="3571" t="6029" r="4056" b="6187">recall</wd>

<space/>

<wd l="4147" t="6029" r="4464" b="6187">and</wd>

<space/>

<wd l="4555" t="6029" r="5208" b="6230">slightly</wd>

<space/>

<wd l="5294" t="6029" r="5794" b="6187">lower</wd>

<space/>

</ln>

<ln l="1421" t="6283" r="5784" b="6485" baseLine="6437" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6283" r="2347" b="6485">precision),</wd>

<space/>

<wd l="2443" t="6283" r="2981" b="6442">which</wd>

<space/>

<wd l="3062" t="6336" r="3614" b="6442">means</wd>

<space/>

<wd l="3706" t="6283" r="4267" b="6485">higher</wd>

<space/>

<wd l="4349" t="6288" r="4550" b="6437">F1</wd>

<space/>

<wd l="4670" t="6336" r="5170" b="6442">score.</wd>

<space/>

<wd l="5261" t="6288" r="5414" b="6437">A</wd>

<space/>

<wd l="5491" t="6336" r="5784" b="6485">po-</wd>

</ln>

<ln l="1426" t="6533" r="5784" b="6734" baseLine="6686" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6533" r="1973" b="6691">tential</wd>

<space/>

<wd l="2045" t="6533" r="2746" b="6691">solution</wd>

<space/>

<wd l="2808" t="6533" r="2938" b="6691">is</wd>

<space/>

<wd l="3005" t="6557" r="3168" b="6691">to</wd>

<space/>

<wd l="3240" t="6533" r="3850" b="6734">change</wd>

<space/>

<wd l="3917" t="6533" r="4176" b="6691">the</wd>

<space/>

<wd l="4248" t="6533" r="5078" b="6691">candidate</wd>

<space/>

<wd l="5150" t="6533" r="5784" b="6691">evalua-</wd>

</ln>

<ln l="1426" t="6787" r="5779" b="6989" baseLine="6941" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6787" r="1771" b="6946">tion</wd>

<space/>

<wd l="1886" t="6811" r="2861" b="6989">component</wd>

<space/>

<wd l="2976" t="6787" r="3302" b="6946">into</wd>

<space/>

<wd l="3427" t="6840" r="3518" b="6946">a</wd>

<space/>

<wd l="3634" t="6787" r="4459" b="6946">two-level</wd>

<space/>

<wd l="4579" t="6787" r="5779" b="6946">classification.</wd>

<space/>

</ln>

<ln l="1430" t="7042" r="5784" b="7200" baseLine="7195" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7042" r="1762" b="7200">The</wd>

<space/>

<wd l="1858" t="7042" r="2208" b="7200">first</wd>

<space/>

<wd l="2294" t="7042" r="2717" b="7200">level</wd>

<space/>

<wd l="2808" t="7042" r="3600" b="7200">classifies</wd>

<space/>

<wd l="3691" t="7042" r="4406" b="7200">whether</wd>

<space/>

<wd l="4488" t="7042" r="4747" b="7200">the</wd>

<space/>

<wd l="4838" t="7042" r="5784" b="7200">normaliza-</wd>

</ln>

<ln l="1426" t="7296" r="5813" b="7498" baseLine="7450" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="7296" r="1771" b="7454">tion</wd>

<space/>

<wd l="1829" t="7296" r="2318" b="7454">needs</wd>

<space/>

<wd l="2390" t="7349" r="2702" b="7498">any</wd>

<space/>

<wd l="2770" t="7296" r="3427" b="7498">change.</wd>

<space/>

<wd l="3504" t="7296" r="3667" b="7450">If</wd>

<space/>

<wd l="3706" t="7349" r="3970" b="7488">no,</wd>

<space/>

<wd l="4037" t="7296" r="4416" b="7454">then</wd>

<space/>

<wd l="4478" t="7296" r="4738" b="7454">the</wd>

<space/>

<wd l="4805" t="7296" r="5294" b="7454">token</wd>

<space/>

<wd l="5357" t="7296" r="5813" b="7454">itself</wd>

<space/>

</ln>

<ln l="1430" t="7546" r="5789" b="7747" baseLine="7699" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7546" r="1565" b="7704">is</wd>

<space/>

<wd l="1646" t="7570" r="2208" b="7747">output</wd>

<space/>

<wd l="2280" t="7598" r="2453" b="7704">as</wd>

<space/>

<wd l="2530" t="7546" r="2794" b="7704">the</wd>

<space/>

<wd l="2870" t="7546" r="4090" b="7704">normalization</wd>

<space/>

<wd l="4162" t="7546" r="4694" b="7704">result.</wd>

<space/>

<wd l="4781" t="7546" r="4944" b="7699">If</wd>

<space/>

<wd l="4992" t="7598" r="5328" b="7747">yes,</wd>

<space/>

<wd l="5414" t="7546" r="5789" b="7704">then</wd>

<space/>

</ln>

<ln l="1426" t="7800" r="5784" b="8002" baseLine="7954" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="7800" r="1690" b="7958">the</wd>

<space/>

<wd l="1810" t="7800" r="2414" b="7958">second</wd>

<space/>

<wd l="2525" t="7800" r="2942" b="7958">level</wd>

<space/>

<wd l="3058" t="7800" r="4214" b="7958">classification</wd>

<space/>

<wd l="4325" t="7800" r="4944" b="8002">assigns</wd>

<space/>

<wd l="5064" t="7853" r="5155" b="7958">a</wd>

<space/>

<wd l="5270" t="7800" r="5784" b="7958">confi-</wd>

</ln>

<ln l="1430" t="8054" r="5794" b="8213" baseLine="8208" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="8054" r="1934" b="8213">dence</wd>

<space/>

<wd l="2054" t="8107" r="2501" b="8213">score</wd>

<space/>

<wd l="2616" t="8078" r="2774" b="8213">to</wd>

<space/>

<wd l="2894" t="8054" r="3293" b="8213">each</wd>

<space/>

<wd l="3403" t="8054" r="4234" b="8213">candidate</wd>

<space/>

<wd l="4349" t="8054" r="4680" b="8213">that</wd>

<space/>

<wd l="4786" t="8054" r="4920" b="8213">is</wd>

<space/>

<wd l="5040" t="8054" r="5794" b="8213">different</wd>

<space/>

</ln>

<ln l="1430" t="8304" r="5784" b="8506" baseLine="8458" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="8304" r="1853" b="8462">from</wd>

<space/>

<wd l="1973" t="8304" r="2237" b="8462">the</wd>

<space/>

<wd l="2366" t="8304" r="2856" b="8462">token</wd>

<space/>

<wd l="2981" t="8304" r="3293" b="8462">and</wd>

<space/>

<wd l="3418" t="8328" r="4051" b="8506">outputs</wd>

<space/>

<wd l="4181" t="8304" r="4445" b="8462">the</wd>

<space/>

<wd l="4574" t="8357" r="4882" b="8462">one</wd>

<space/>

<wd l="5011" t="8304" r="5400" b="8462">with</wd>

<space/>

<wd l="5525" t="8304" r="5784" b="8462">the</wd>

<space/>

</ln>

<ln l="1426" t="8558" r="3730" b="8760" baseLine="8712" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8558" r="2064" b="8760">highest</wd>

<space/>

<wd l="2126" t="8611" r="2573" b="8717">score</wd>

<space/>

<wd l="2640" t="8611" r="2813" b="8717">as</wd>

<space/>

<wd l="2875" t="8558" r="3139" b="8717">the</wd>

<space/>

<wd l="3197" t="8558" r="3730" b="8717">result.</wd>

</ln>

</para>

<para l="1430" t="9024" r="4978" b="9192" alignment="left" li="72" spaceBefore="208" lsp="exactly" lspExact="273" language="en">

<ln l="1430" t="9024" r="4978" b="9192" baseLine="9187" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">

<wd l="1430" t="9024" r="1541" b="9192">5</wd>

<space/>

<wd l="1867" t="9024" r="3091" b="9192">Conclusions</wd>

<space/>

<wd l="3168" t="9024" r="3542" b="9192">and</wd>

<space/>

<wd l="3610" t="9024" r="4306" b="9192">Future</wd>

<space/>

<wd l="4373" t="9024" r="4978" b="9192">Work</wd>

</ln>

</para>

<para l="1426" t="9446" r="5794" b="14160" alignment="justified" li="72" ri="144" spaceBefore="155" lsp="exactly" lspExact="253" language="en">

<ln l="1430" t="9446" r="5789" b="9648" baseLine="9600" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="9456" r="1608" b="9600">In</wd>

<space/>

<wd l="1714" t="9446" r="2026" b="9605">this</wd>

<space/>

<wd l="2131" t="9499" r="2669" b="9648">paper,</wd>

<space/>

<wd l="2784" t="9499" r="3038" b="9605">we</wd>

<space/>

<wd l="3144" t="9470" r="3782" b="9648">present</wd>

<space/>

<wd l="3893" t="9499" r="3984" b="9605">a</wd>

<space/>

<wd l="4099" t="9470" r="4699" b="9648">system</wd>

<space/>

<wd l="4805" t="9470" r="4968" b="9605">to</wd>

<space/>

<wd l="5078" t="9446" r="5789" b="9648">perform</wd>

<space/>

</ln>

<ln l="1430" t="9701" r="5779" b="9902" baseLine="9854" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="9701" r="2011" b="9859">lexical</wd>

<space/>

<wd l="2131" t="9701" r="3355" b="9859">normalization</wd>

<space/>

<wd l="3475" t="9701" r="3734" b="9859">for</wd>

<space/>

<wd l="3850" t="9701" r="4522" b="9902">English</wd>

<space/>

<wd l="4646" t="9701" r="5290" b="9859">Twitter</wd>

<space/>

<wd l="5405" t="9725" r="5779" b="9893">text,</wd>

<space/>

</ln>

<ln l="1426" t="9955" r="5794" b="10114" baseLine="10109" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9955" r="1819" b="10114">with</wd>

<space/>

<wd l="1925" t="10008" r="2016" b="10114">a</wd>

<space/>

<wd l="2126" t="9955" r="3134" b="10114">constrained</wd>

<space/>

<wd l="3235" t="9955" r="3720" b="10114">mode</wd>

<space/>

<wd l="3830" t="9955" r="4147" b="10114">and</wd>

<space/>

<wd l="4253" t="10008" r="4450" b="10114">an</wd>

<space/>

<wd l="4555" t="9955" r="5794" b="10114">unconstrained</wd>

<space/>

</ln>

<ln l="1426" t="10205" r="5765" b="10406" baseLine="10358" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10205" r="1958" b="10363">mode.</wd>

<space/>

<wd l="2035" t="10210" r="2376" b="10363">Our</wd>

<space/>

<wd l="2443" t="10205" r="3451" b="10363">constrained</wd>

<space/>

<wd l="3514" t="10205" r="3994" b="10363">mode</wd>

<space/>

<wd l="4070" t="10205" r="4814" b="10363">achieves</wd>

<space/>

<wd l="4886" t="10205" r="5146" b="10363">the</wd>

<space/>

<wd l="5218" t="10229" r="5486" b="10406">top</wd>

<space/>

<wd l="5558" t="10210" r="5765" b="10358">F1</wd>

<space/>

</ln>

<ln l="1435" t="10459" r="5794" b="10661" baseLine="10613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1435" t="10512" r="1886" b="10618">score</wd>

<space/>

<wd l="2011" t="10459" r="2179" b="10613">in</wd>

<space/>

<wd l="2294" t="10459" r="2558" b="10618">the</wd>

<space/>

<wd l="2678" t="10469" r="3408" b="10618">W-NUT</wd>

<space/>

<wd l="3528" t="10459" r="4008" b="10661">noisy</wd>

<space/>

<wd l="4123" t="10483" r="4454" b="10618">text</wd>

<space/>

<wd l="4570" t="10459" r="5794" b="10618">normalization</wd>

<space/>

</ln>

<ln l="1430" t="10714" r="5770" b="10915" baseLine="10867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="10714" r="2477" b="10915">competition</wd>

<space/>

<wd l="2573" t="10714" r="2890" b="10872">and</wd>

<space/>

<wd l="2986" t="10714" r="4046" b="10915">outperforms</wd>

<space/>

<wd l="4152" t="10714" r="4603" b="10872">other</wd>

<space/>

<wd l="4685" t="10714" r="5770" b="10915">participants’</wd>

<space/>

</ln>

<ln l="1426" t="10968" r="5784" b="11126" baseLine="11122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10968" r="2659" b="11126">unconstrained</wd>

<space/>

<wd l="2770" t="10968" r="3389" b="11126">modes.</wd>

<space/>

<wd l="3514" t="10973" r="3854" b="11126">Our</wd>

<space/>

<wd l="3960" t="10968" r="5194" b="11126">unconstrained</wd>

<space/>

<wd l="5304" t="10968" r="5784" b="11126">mode</wd>

<space/>

</ln>

<ln l="1430" t="11218" r="5784" b="11419" baseLine="11371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="11218" r="2222" b="11419">currently</wd>

<space/>

<wd l="2309" t="11218" r="2592" b="11376">has</wd>

<space/>

<wd l="2698" t="11218" r="3350" b="11419">slightly</wd>

<space/>

<wd l="3442" t="11218" r="3941" b="11376">lower</wd>

<space/>

<wd l="4027" t="11218" r="4512" b="11376">recall</wd>

<space/>

<wd l="4608" t="11218" r="4920" b="11376">and</wd>

<space/>

<wd l="5006" t="11222" r="5218" b="11371">F1</wd>

<space/>

<wd l="5338" t="11270" r="5784" b="11376">score</wd>

<space/>

</ln>

<ln l="1426" t="11472" r="5784" b="11664" baseLine="11626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="11472" r="1800" b="11630">than</wd>

<space/>

<wd l="1882" t="11472" r="2141" b="11630">the</wd>

<space/>

<wd l="2232" t="11472" r="3240" b="11630">constrained</wd>

<space/>

<wd l="3317" t="11472" r="3845" b="11664">mode,</wd>

<space/>

<wd l="3931" t="11472" r="4219" b="11630">but</wd>

<space/>

<wd l="4296" t="11472" r="4416" b="11630">it</wd>

<space/>

<wd l="4488" t="11472" r="4776" b="11630">has</wd>

<space/>

<wd l="4862" t="11525" r="4954" b="11630">a</wd>

<space/>

<wd l="5035" t="11472" r="5266" b="11630">lot</wd>

<space/>

<wd l="5338" t="11525" r="5784" b="11630">more</wd>

<space/>

</ln>

<ln l="1426" t="11726" r="5784" b="11928" baseLine="11880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="11779" r="1886" b="11885">room</wd>

<space/>

<wd l="1958" t="11726" r="2218" b="11885">for</wd>

<space/>

<wd l="2280" t="11726" r="3451" b="11928">improvement</wd>

<space/>

<wd l="3518" t="11779" r="3691" b="11885">as</wd>

<space/>

<wd l="3768" t="11726" r="4608" b="11885">discussed</wd>

<space/>

<wd l="4675" t="11726" r="4843" b="11880">in</wd>

<space/>

<wd l="4906" t="11726" r="5170" b="11885">the</wd>

<space/>

<wd l="5246" t="11726" r="5784" b="11885">evalu-</wd>

</ln>

<ln l="1430" t="11976" r="5784" b="12178" baseLine="12130" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="11976" r="1867" b="12134">ation</wd>

<space/>

<wd l="1973" t="11976" r="2626" b="12134">section.</wd>

<space/>

<wd l="2731" t="11986" r="3302" b="12134">Future</wd>

<space/>

<wd l="3403" t="11976" r="3854" b="12134">work</wd>

<space/>

<wd l="3955" t="11976" r="4675" b="12134">includes</wd>

<space/>

<wd l="4781" t="11976" r="5784" b="12178">implement-</wd>

</ln>

<ln l="1430" t="12230" r="5784" b="12432" baseLine="12384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="12230" r="1704" b="12432">ing</wd>

<space/>

<wd l="1776" t="12230" r="2035" b="12389">the</wd>

<space/>

<wd l="2112" t="12230" r="2554" b="12389">ideas</wd>

<space/>

<wd l="2630" t="12254" r="2789" b="12389">to</wd>

<space/>

<wd l="2870" t="12230" r="3590" b="12432">improve</wd>

<space/>

<wd l="3667" t="12230" r="3926" b="12389">the</wd>

<space/>

<wd l="4003" t="12230" r="5237" b="12389">unconstrained</wd>

<space/>

<wd l="5304" t="12230" r="5784" b="12389">mode</wd>

<space/>

</ln>

<ln l="1430" t="12485" r="5794" b="12686" baseLine="12638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="12485" r="1742" b="12643">and</wd>

<space/>

<wd l="1824" t="12485" r="2654" b="12686">exploring</wd>

<space/>

<wd l="2746" t="12485" r="4166" b="12686">semi-supervised</wd>

<space/>

<wd l="4243" t="12485" r="4555" b="12643">and</wd>

<space/>

<wd l="4632" t="12485" r="5794" b="12686">unsupervised</wd>

<space/>

</ln>

<ln l="1426" t="12734" r="5794" b="12936" baseLine="12888" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="12758" r="1757" b="12893">text</wd>

<space/>

<wd l="1886" t="12734" r="3154" b="12893">normalization.</wd>

<space/>

<wd l="3302" t="12739" r="3658" b="12893">One</wd>

<space/>

<wd l="3787" t="12734" r="4560" b="12936">potential</wd>

<space/>

<wd l="4704" t="12734" r="5405" b="12893">solution</wd>

<space/>

<wd l="5539" t="12734" r="5794" b="12893">for</wd>

<space/>

</ln>

<ln l="1426" t="12989" r="5789" b="13190" baseLine="13142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="12989" r="2587" b="13190">unsupervised</wd>

<space/>

<wd l="2645" t="13013" r="2976" b="13147">text</wd>

<space/>

<wd l="3029" t="12989" r="4253" b="13147">normalization</wd>

<space/>

<wd l="4315" t="12989" r="4450" b="13147">is</wd>

<space/>

<wd l="4517" t="12989" r="4867" b="13147">first</wd>

<space/>

<wd l="4930" t="12989" r="5789" b="13190">clustering</wd>

<space/>

</ln>

<ln l="1426" t="13243" r="5789" b="13445" baseLine="13397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="13243" r="1992" b="13402">tokens</wd>

<space/>

<wd l="2088" t="13243" r="2597" b="13402">based</wd>

<space/>

<wd l="2693" t="13296" r="2909" b="13402">on</wd>

<space/>

<wd l="3010" t="13267" r="3653" b="13402">context</wd>

<space/>

<wd l="3754" t="13243" r="4128" b="13445">(e.g.</wd>

<space/>

<wd l="4234" t="13253" r="4829" b="13402">Brown</wd>

<space/>

<wd l="4930" t="13243" r="5789" b="13445">clustering</wd>

<space/>

</ln>

<ln l="1430" t="13498" r="5794" b="13699" baseLine="13651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13498" r="2098" b="13699">(Brown</wd>

<space/>

<wd l="2165" t="13522" r="2318" b="13656">et</wd>

<space/>

<wd l="2386" t="13498" r="2635" b="13690">al.,</wd>

<space/>

<wd l="2736" t="13498" r="3293" b="13699">1992))</wd>

<space/>

<wd l="3365" t="13498" r="3677" b="13656">and</wd>

<space/>

<wd l="3739" t="13498" r="4118" b="13656">then</wd>

<space/>

<wd l="4181" t="13498" r="4968" b="13699">choosing</wd>

<space/>

<wd l="5035" t="13498" r="5294" b="13656">the</wd>

<space/>

<wd l="5362" t="13522" r="5794" b="13656">most</wd>

<space/>

</ln>

<ln l="1430" t="13747" r="5789" b="13949" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="13747" r="2160" b="13949">frequent</wd>

<space/>

<wd l="2256" t="13747" r="2746" b="13906">token</wd>

<space/>

<wd l="2851" t="13747" r="3014" b="13901">in</wd>

<space/>

<wd l="3120" t="13747" r="3518" b="13906">each</wd>

<space/>

<wd l="3619" t="13747" r="4205" b="13906">cluster</wd>

<space/>

<wd l="4306" t="13800" r="4474" b="13906">as</wd>

<space/>

<wd l="4584" t="13747" r="4843" b="13906">the</wd>

<space/>

<wd l="4954" t="13747" r="5789" b="13906">canonical</wd>

<space/>

</ln>

<ln l="1430" t="14002" r="4363" b="14160" baseLine="14155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="14002" r="1853" b="14160">form</wd>

<space/>

<wd l="1915" t="14002" r="2170" b="14160">for</wd>

<space/>

<wd l="2227" t="14002" r="2438" b="14160">all</wd>

<space/>

<wd l="2496" t="14002" r="3062" b="14160">tokens</wd>

<space/>

<wd l="3130" t="14002" r="3298" b="14155">in</wd>

<space/>

<wd l="3350" t="14002" r="3682" b="14160">that</wd>

<space/>

<wd l="3739" t="14002" r="4363" b="14160">cluster.</wd>

</ln>

</para>

<para l="1426" t="14506" r="2448" b="14674" alignment="left" li="72" spaceBefore="247" lsp="exactly" lspExact="273" language="en">

<ln l="1426" t="14506" r="2448" b="14674" baseLine="14669" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="14506" r="2448" b="14674">Reference</wd>

</ln>

</para>

<para l="1430" t="14885" r="5789" b="15298" alignment="justified" li="288" ri="144" spaceBefore="114" spaceAfter="31" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="1430" t="14885" r="5789" b="15058" baseLine="15024" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14890" r="1589" b="15029">T.</wd>

<space/>

<wd l="1661" t="14885" r="2381" b="15058">Baldwin,</wd>

<space/>

<wd l="2448" t="14890" r="2664" b="15029">M.</wd>

<space/>

<wd l="2746" t="14885" r="3557" b="15058">Catherine,</wd>

<space/>

<wd l="3629" t="14890" r="3797" b="15029">B.</wd>

<space/>

<wd l="3874" t="14890" r="4248" b="15058">Han,</wd>

<space/>

<wd l="4315" t="14890" r="4680" b="15029">Y.B.</wd>

<space/>

<wd l="4757" t="14885" r="5155" b="15058">Kim,</wd>

<space/>

<wd l="5222" t="14890" r="5405" b="15029">A.</wd>

<space/>

<wd l="5477" t="14885" r="5789" b="15029">Rit-</wd>

</ln>

<ln l="1651" t="15115" r="5789" b="15298" baseLine="15254">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="15134" r="1867" b="15259">ter</wd>

<space/>

<wd l="1968" t="15115" r="2251" b="15259">and</wd>

<space/>

<wd l="2347" t="15120" r="2578" b="15259">W.</wd>

<space/>

<wd l="2683" t="15120" r="2966" b="15259">Xu.</wd>

<space/>

<wd l="3077" t="15120" r="3514" b="15259">2015.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3624" t="15115" r="4200" b="15259">Shared</wd>

<space/>

<wd l="4286" t="15115" r="4728" b="15259">Tasks</wd>

<space/>

<wd l="4824" t="15115" r="5016" b="15298">of</wd>

<space/>

<wd l="5064" t="15115" r="5304" b="15259">the</wd>

<space/>

<wd l="5390" t="15120" r="5789" b="15259">2015</wd>

</run>

</ln>

</para>

</column>

<column l="6032" t="1429" r="10626" b="8513">

<para l="6350" t="1469" r="10502" b="2112" alignment="justified" li="288" ri="72" spaceBefore="1" lsp="exactly" lspExact="230" language="en">

<ln l="6384" t="1469" r="10502" b="1651" baseLine="1608" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6384" t="1469" r="7171" b="1651">Workshop</wd>

<space/>

<wd l="7262" t="1517" r="7450" b="1613">on</wd>

<space/>

<wd l="7536" t="1474" r="8002" b="1651">Noisy</wd>

<space/>

<wd l="8107" t="1469" r="9350" b="1651">User-generated</wd>

<space/>

<wd l="9437" t="1474" r="9821" b="1613">Text:</wd>

<space/>

<wd l="9931" t="1474" r="10502" b="1613">Twitter</wd>

<space/>

</ln>

<ln l="6350" t="1699" r="10498" b="1882" baseLine="1838" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="1699" r="6960" b="1843">Lexical</wd>

<space/>

<wd l="7008" t="1699" r="8165" b="1843">Normalization</wd>

<space/>

<wd l="8237" t="1699" r="8549" b="1843">and</wd>

<space/>

<wd l="8592" t="1699" r="9178" b="1843">Named</wd>

<space/>

<wd l="9230" t="1704" r="9715" b="1882">Entity</wd>

<space/>

<wd l="9773" t="1704" r="10498" b="1882">Recogni-</wd>

</ln>

<ln l="6365" t="1930" r="8280" b="2112" baseLine="2069">

<wd l="6365" t="1939" r="6710" b="2074"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">tion</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6763" t="1934" r="6931" b="2074">In</wd>

<space/>

<wd l="6984" t="1934" r="7406" b="2074">Proc.</wd>

<space/>

<wd l="7483" t="1930" r="7675" b="2112">of</wd>

<space/>

</run>

<wd l="7709" t="1934" r="8280" b="2074"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">WNUT</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6134" t="2280" r="10498" b="2693" alignment="justified" li="288" ri="72" spaceBefore="116" fli="-216" lsp="exactly" lspExact="230" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6134" t="2280" r="10498" b="2424" baseLine="2419">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="2285" r="6384" b="2419">L.</wd>

<space/>

<wd l="6384" t="2280" r="7114" b="2424">Breiman.</wd>

<space/>

<wd l="7200" t="2285" r="7642" b="2424">2001.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7723" t="2280" r="8386" b="2424">Random</wd>

<space/>

</run>

<wd l="8458" t="2285" r="9101" b="2424"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Forests</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9192" t="2280" r="9888" b="2424">Machine</wd>

<space/>

<wd l="9965" t="2285" r="10498" b="2424">Learn-</wd>

</run>

</ln>

<ln l="6365" t="2510" r="7656" b="2693" baseLine="2650" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="2510" r="6662" b="2693">ing,</wd>

<space/>

<wd l="6720" t="2510" r="7195" b="2693">45(1),</wd>

<space/>

<wd l="7258" t="2515" r="7656" b="2654">5-32.</wd>

</ln>

</para>

<para l="6134" t="2861" r="10507" b="3696" alignment="justified" li="288" ri="72" spaceBefore="121" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6134" t="2861" r="10488" b="3034" baseLine="3000" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="2866" r="6283" b="3005">P.</wd>

<space/>

<wd l="6370" t="2866" r="6955" b="3034">Brown,</wd>

<space/>

<wd l="7032" t="2866" r="7181" b="3005">P.</wd>

<space/>

<wd l="7272" t="2861" r="7987" b="3034">deSouza,</wd>

<space/>

<wd l="8064" t="2866" r="8237" b="3005">R.</wd>

<space/>

<wd l="8318" t="2866" r="8938" b="3034">Mercer,</wd>

<space/>

<wd l="9014" t="2866" r="9197" b="3005">V.</wd>

<space/>

<wd l="9283" t="2861" r="9715" b="3005">Della</wd>

<space/>

<wd l="9782" t="2861" r="10296" b="3034">Pietra,</wd>

<space/>

<wd l="10373" t="2866" r="10488" b="3005">J.</wd>

<space/>

</ln>

<ln l="6360" t="3091" r="10507" b="3274" baseLine="3230">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="3091" r="6667" b="3235">Lai.</wd>

<space/>

<wd l="6773" t="3096" r="7195" b="3235">1992.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7291" t="3091" r="8294" b="3235">Class-Based</wd>

<space/>

<wd l="8347" t="3139" r="8928" b="3274">n-gram</wd>

<space/>

<wd l="8990" t="3091" r="9586" b="3235">Models</wd>

<space/>

<wd l="9662" t="3091" r="9854" b="3274">of</wd>

<space/>

<wd l="9869" t="3091" r="10507" b="3235">Natural</wd>

<space/>

</run>

</ln>

<ln l="6350" t="3322" r="10488" b="3504" baseLine="3461">

<wd l="6350" t="3326" r="7200" b="3504"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Language</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7296" t="3322" r="8491" b="3504">Computational</wd>

<space/>

</run>

<wd l="8539" t="3326" r="9470" b="3504"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Linguistics</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9547" t="3322" r="9840" b="3466">vol.</wd>

<space/>

<wd l="9946" t="3326" r="10171" b="3494">18,</wd>

<space/>

<wd l="10248" t="3370" r="10488" b="3504">pp.</wd>

<space/>

</run>

</ln>

<ln l="6360" t="3557" r="7099" b="3696" baseLine="3691" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="3557" r="7099" b="3696">467–479.</wd>

</ln>

</para>

<para l="6134" t="3902" r="10498" b="5002" alignment="justified" li="288" ri="72" spaceBefore="122" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6134" t="3902" r="10488" b="4085" baseLine="4042" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="3907" r="6317" b="4046">K.</wd>

<space/>

<wd l="6418" t="3902" r="7056" b="4085">Gimpel,</wd>

<space/>

<wd l="7147" t="3907" r="7334" b="4046">N.</wd>

<space/>

<wd l="7440" t="3902" r="8270" b="4075">Schneider,</wd>

<space/>

<wd l="8366" t="3907" r="8534" b="4046">B.</wd>

<space/>

<wd l="8635" t="3907" r="9485" b="4075">O’Connor,</wd>

<space/>

<wd l="9581" t="3907" r="9763" b="4046">D.</wd>

<space/>

<wd l="9859" t="3907" r="10214" b="4075">Das,</wd>

<space/>

<wd l="10306" t="3907" r="10488" b="4046">D.</wd>

<space/>

</ln>

<ln l="6360" t="4133" r="10488" b="4315" baseLine="4272" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="4133" r="6826" b="4306">Mills,</wd>

<space/>

<wd l="6907" t="4138" r="7022" b="4277">J.</wd>

<space/>

<wd l="7109" t="4133" r="7973" b="4306">Eisenstein,</wd>

<space/>

<wd l="8050" t="4138" r="8266" b="4277">M.</wd>

<space/>

<wd l="8352" t="4133" r="9082" b="4306">Heilman,</wd>

<space/>

<wd l="9163" t="4138" r="9346" b="4277">D.</wd>

<space/>

<wd l="9427" t="4138" r="10291" b="4315">Yogatama,</wd>

<space/>

<wd l="10373" t="4138" r="10488" b="4277">J.</wd>

<space/>

</ln>

<ln l="6360" t="4358" r="10493" b="4541" baseLine="4498">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="4358" r="7104" b="4541">Flanigan,</wd>

<space/>

<wd l="7210" t="4358" r="7493" b="4502">and</wd>

<space/>

<wd l="7579" t="4363" r="7766" b="4502">N.</wd>

<space/>

<wd l="7867" t="4363" r="8050" b="4502">A.</wd>

<space/>

<wd l="8165" t="4358" r="8669" b="4502">Smith.</wd>

<space/>

<wd l="8770" t="4363" r="9211" b="4502">2011.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9307" t="4358" r="10493" b="4541">Part-of-speech</wd>

<space/>

</run>

</ln>

<ln l="6365" t="4589" r="10498" b="4771" baseLine="4728" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="4598" r="6979" b="4771">tagging</wd>

<space/>

<wd l="7013" t="4589" r="7291" b="4771">for</wd>

<space/>

<wd l="7373" t="4594" r="7997" b="4733">Twitter:</wd>

<space/>

<wd l="8078" t="4594" r="9005" b="4757">Annotation,</wd>

<space/>

<wd l="9067" t="4589" r="9778" b="4771">features,</wd>

<space/>

<wd l="9878" t="4589" r="10186" b="4733">and</wd>

<space/>

<wd l="10262" t="4637" r="10498" b="4733">ex-</wd>

</ln>

<ln l="6336" t="4819" r="8568" b="5002" baseLine="4958">

<wd l="6336" t="4829" r="7190" b="5002"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">periments</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7258" t="4824" r="7416" b="4958">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7464" t="4824" r="7886" b="4963">Proc.</wd>

<space/>

<wd l="7963" t="4819" r="8150" b="5002">of</wd>

<space/>

</run>

<wd l="8146" t="4824" r="8568" b="4963"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ACL</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6134" t="5170" r="10502" b="5813" alignment="justified" li="288" ri="72" spaceBefore="122" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6134" t="5170" r="10502" b="5314" baseLine="5309" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="5174" r="6307" b="5314">B.</wd>

<space/>

<wd l="6379" t="5174" r="6706" b="5314">Han</wd>

<space/>

<wd l="6778" t="5170" r="7061" b="5314">and</wd>

<space/>

<wd l="7123" t="5174" r="7282" b="5314">T.</wd>

<space/>

<wd l="7354" t="5170" r="8069" b="5314">Baldwin.</wd>

<space/>

<wd l="8141" t="5174" r="8578" b="5314">2011.</wd>

<space/>

<wd l="8654" t="5170" r="9336" b="5314">“Lexical</wd>

<space/>

<wd l="9398" t="5170" r="10502" b="5314">normalisation</wd>

<space/>

</ln>

<ln l="6365" t="5400" r="10502" b="5582" baseLine="5539" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="5400" r="6552" b="5544">of</wd>

<space/>

<wd l="6619" t="5400" r="7008" b="5544">short</wd>

<space/>

<wd l="7085" t="5419" r="7387" b="5544">text</wd>

<space/>

<wd l="7464" t="5448" r="8261" b="5582">messages:</wd>

<space/>

<wd l="8357" t="5400" r="8822" b="5544">Makn</wd>

<space/>

<wd l="8909" t="5448" r="9240" b="5544">sens</wd>

<space/>

<wd l="9331" t="5448" r="9418" b="5544">a</wd>

<space/>

<wd l="9494" t="5400" r="10243" b="5544">#twitter”.</wd>

<space/>

<wd l="10339" t="5405" r="10502" b="5539">In</wd>

<space/>

</ln>

<ln l="6355" t="5630" r="7464" b="5813" baseLine="5770">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="5635" r="6778" b="5774">Proc.</wd>

<space/>

<wd l="6854" t="5630" r="7046" b="5813">of</wd>

<space/>

</run>

<wd l="7042" t="5635" r="7464" b="5774"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ACL</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6134" t="5981" r="10498" b="6394" alignment="justified" li="288" ri="72" spaceBefore="120" fli="-216" lsp="exactly" lspExact="230" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="6134" t="5981" r="10498" b="6163" baseLine="6120">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="5986" r="6437" b="6120">M.</wd>

<space/>

<wd l="6437" t="5981" r="7560" b="6163">Levandowsky</wd>

<space/>

<wd l="7637" t="5981" r="7925" b="6125">and</wd>

<space/>

<wd l="7997" t="5986" r="8179" b="6125">D.</wd>

<space/>

<wd l="8266" t="5981" r="8861" b="6125">Winter.</wd>

<space/>

<wd l="8966" t="5986" r="9384" b="6125">1971.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9461" t="5986" r="10181" b="6125">Distance</wd>

<space/>

<wd l="10248" t="5981" r="10498" b="6125">be-</wd>

</run>

</ln>

<ln l="6365" t="6211" r="9110" b="6394" baseLine="6350">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6365" t="6235" r="6821" b="6355">tween</wd>

<space/>

</run>

<wd l="6874" t="6235" r="7219" b="6355"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">sets</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7267" t="6216" r="7834" b="6355">Nature</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7886" t="6216" r="8179" b="6355">234</wd>

<space/>

<wd l="8237" t="6211" r="8506" b="6394">(5):</wd>

<space/>

<wd l="8578" t="6216" r="9110" b="6355">34–35.</wd>

</run>

</ln>

</para>

<para l="6134" t="6562" r="10507" b="7205" alignment="justified" li="288" ri="72" spaceBefore="122" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6134" t="6562" r="10498" b="6744" baseLine="6701">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="6566" r="6317" b="6706">V.</wd>

<space/>

<wd l="6418" t="6562" r="7435" b="6706">Levenshtein.</wd>

<space/>

<wd l="7550" t="6566" r="7973" b="6706">1966.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8069" t="6566" r="8616" b="6744">Binary</wd>

<space/>

<wd l="8698" t="6562" r="9139" b="6706">codes</wd>

<space/>

<wd l="9230" t="6562" r="9854" b="6744">capable</wd>

<space/>

<wd l="9941" t="6562" r="10133" b="6744">of</wd>

<space/>

<wd l="10171" t="6610" r="10498" b="6706">cor-</wd>

</run>

</ln>

<ln l="6360" t="6792" r="10507" b="6974" baseLine="6931">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="6802" r="6936" b="6974">recting</wd>

<space/>

<wd l="7003" t="6792" r="7757" b="6960">deletions,</wd>

<space/>

<wd l="7858" t="6802" r="8669" b="6960">insertions,</wd>

<space/>

<wd l="8770" t="6792" r="9077" b="6936">and</wd>

<space/>

</run>

<wd l="9144" t="6792" r="9912" b="6936"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">reversals</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="10013" t="6797" r="10507" b="6936">Soviet</wd>

<space/>

</run>

</ln>

<ln l="6355" t="7022" r="9077" b="7205" baseLine="7162">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="7022" r="6965" b="7205">Physics</wd>

<space/>

<wd l="7013" t="7022" r="7704" b="7205">Doklady</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7771" t="7027" r="7949" b="7166">10</wd>

<space/>

<wd l="8006" t="7022" r="8270" b="7205">(8):</wd>

<space/>

<wd l="8342" t="7027" r="9077" b="7166">707–710.</wd>

</run>

</ln>

</para>

<para l="6144" t="7368" r="10498" b="8472" alignment="justified" li="288" ri="72" spaceBefore="117" spaceAfter="30" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6144" t="7368" r="10488" b="7550" baseLine="7507" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6144" t="7373" r="6283" b="7512">S.</wd>

<space/>

<wd l="6346" t="7368" r="6878" b="7541">White,</wd>

<space/>

<wd l="6936" t="7373" r="7109" b="7512">R.</wd>

<space/>

<wd l="7171" t="7368" r="7867" b="7541">Johnson,</wd>

<space/>

<wd l="7934" t="7373" r="8074" b="7512">S.</wd>

<space/>

<wd l="8136" t="7368" r="9072" b="7550">Liversedge,</wd>

<space/>

<wd l="9125" t="7373" r="9312" b="7512">K.</wd>

<space/>

<wd l="9374" t="7373" r="9989" b="7550">Rayner.</wd>

<space/>

<wd l="10051" t="7373" r="10488" b="7512">2008.</wd>

<space/>

</ln>

<ln l="6355" t="7598" r="10483" b="7781" baseLine="7738" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="7603" r="6658" b="7781">Eye</wd>

<space/>

<wd l="6744" t="7603" r="7661" b="7742">Movements</wd>

<space/>

<wd l="7781" t="7598" r="8203" b="7742">When</wd>

<space/>

<wd l="8299" t="7598" r="8976" b="7781">Reading</wd>

<space/>

<wd l="9072" t="7598" r="10008" b="7781">Transposed</wd>

<space/>

<wd l="10099" t="7603" r="10483" b="7742">Text:</wd>

<space/>

</ln>

<ln l="6370" t="7829" r="10498" b="8011" baseLine="7968">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6370" t="7829" r="6658" b="7973">The</wd>

<space/>

<wd l="6730" t="7834" r="7656" b="8011">Importance</wd>

<space/>

<wd l="7738" t="7829" r="7930" b="8011">of</wd>

<space/>

<wd l="7992" t="7829" r="9302" b="8011">Word-Beginning</wd>

<space/>

</run>

<wd l="9365" t="7834" r="9970" b="7973"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Letters</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="10056" t="7834" r="10498" b="7973" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Jour-</wd>

</ln>

<ln l="6360" t="8059" r="10493" b="8242" baseLine="8198" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="8059" r="6624" b="8203">nal</wd>

<space/>

<wd l="6691" t="8059" r="6883" b="8242">of</wd>

<space/>

<wd l="6912" t="8059" r="7958" b="8242">experimental</wd>

<space/>

<wd l="7997" t="8059" r="8923" b="8242">psychology</wd>

<space/>

<wd l="8976" t="8064" r="9566" b="8203">Human</wd>

<space/>

<wd l="9619" t="8069" r="10493" b="8242">perception</wd>

<space/>

</ln>

<ln l="6360" t="8290" r="7771" b="8472" baseLine="8429">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="8290" r="6672" b="8434">and</wd>

<space/>

</run>

<wd l="6686" t="8290" r="7771" b="8472"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">performance</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15736" r="6181" b="15977">

<para l="5804" t="15787" r="6148" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15787" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="27">

<wd l="5870" t="15792" r="6082" b="15946">92</wd>

</ln>

</para>

</dd>

</body>

</page>

</document>

