<?xml version="1.0" encoding="UTF-16"?>

<!--XML document generated using OCR technology from Nuance Communications, Inc.-->

<document xmlns="http://www.scansoft.com/omnipage/xml/ssdoc-schema3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4312.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1411" marginTop="1440" marginRight="1407" marginBottom="358" offsetX="2" offsetY="-36" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1411" t="2240" r="10502" b="3132">

<column l="1411" t="2240" r="10502" b="3132">

<para l="2578" t="2304" r="9326" b="2558" alignment="centered" spaceBefore="8" spaceAfter="554" lsp="exactly" lspExact="322" language="en">

<ln l="2578" t="2304" r="9326" b="2558" baseLine="2496" bold="true" underlined="none" subsuperscript="none" fontSize="1400" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="2578" t="2304" r="3269" b="2506">Bekli:</wd>

<space/>

<wd l="3374" t="2309" r="3566" b="2501">A</wd>

<space/>

<wd l="3653" t="2304" r="4459" b="2558">Simple</wd>

<space/>

<wd l="4531" t="2309" r="5707" b="2558">Approach</wd>

<space/>

<wd l="5789" t="2323" r="6010" b="2506">to</wd>

<space/>

<wd l="6086" t="2304" r="6960" b="2506">Twitter</wd>

<space/>

<wd l="7032" t="2309" r="7546" b="2506">Text</wd>

<space/>

<wd l="7613" t="2304" r="9326" b="2510">Normalization</wd>

</ln>

</para>

</column>

</section>

<section l="1411" t="3132" r="10502" b="5272">

<column l="1411" t="3132" r="10502" b="5272">

<para l="4128" t="3192" r="7776" b="4205" alignment="centered" spaceAfter="1065" lsp="exactly" lspExact="268" language="en">

<ln l="5189" t="3192" r="6720" b="3398" baseLine="3346" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5189" t="3192" r="5890" b="3355">Russell</wd>

<space/>

<wd l="5952" t="3192" r="6720" b="3398">Beckley
</wd>

</ln>

<ln l="4128" t="3461" r="7776" b="3672" baseLine="3619" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="4128" t="3461" r="4814" b="3672">Oregon</wd>

<space/>

<wd l="4872" t="3461" r="5491" b="3624">Health</wd>

<space/>

<wd l="5554" t="3461" r="5885" b="3624">and</wd>

<space/>

<wd l="5952" t="3461" r="6744" b="3624">Sciences</wd>

<space/>

<wd l="6811" t="3461" r="7776" b="3672">University
</wd>

</ln>

<ln l="5155" t="3734" r="6749" b="3946" baseLine="3888" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="5155" t="3734" r="5990" b="3926">Portland,</wd>

<space/>

<wd l="6062" t="3734" r="6749" b="3946">Oregon
</wd>

</ln>

<ln l="4776" t="4013" r="7128" b="4205" baseLine="4157" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0" forcedEOF="true">

<wd l="4776" t="4013" r="7128" b="4205">beckleyr@ohsu.edu</wd>

</ln>

</para>

</column>

</section>

<section l="1411" t="5272" r="10502" b="14764">

<column l="1411" t="5272" r="5822" b="14764">

<para l="3182" t="5328" r="4051" b="5496" alignment="centered" spaceBefore="8" lsp="exactly" lspExact="266" language="en">

<ln l="3182" t="5328" r="4051" b="5496" baseLine="5486" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3182" t="5328" r="4051" b="5496">Abstract</wd>

</ln>

</para>

<para l="1838" t="5760" r="5390" b="8448" alignment="justified" li="432" ri="432" spaceBefore="177" lsp="exactly" lspExact="231" language="en">

<ln l="1838" t="5760" r="5381" b="5938" baseLine="5890" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="5765" r="2294" b="5938">Every</wd>

<space/>

<wd l="2371" t="5760" r="2669" b="5938">day,</wd>

<space/>

<wd l="2755" t="5760" r="3312" b="5899">Twitter</wd>

<space/>

<wd l="3379" t="5803" r="3768" b="5899">users</wd>

<space/>

<wd l="3854" t="5779" r="4502" b="5938">generate</wd>

<space/>

<wd l="4574" t="5779" r="4882" b="5899">vast</wd>

<space/>

<wd l="4949" t="5803" r="5381" b="5938">quan-</wd>

</ln>

<ln l="1838" t="5990" r="5390" b="6168" baseLine="6125" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="5990" r="2203" b="6130">tities</wd>

<space/>

<wd l="2285" t="5990" r="2458" b="6130">of</wd>

<space/>

<wd l="2510" t="5990" r="3341" b="6168">potentially</wd>

<space/>

<wd l="3408" t="5990" r="3878" b="6130">useful</wd>

<space/>

<wd l="3950" t="5990" r="4862" b="6130">information</wd>

<space/>

<wd l="4934" t="5990" r="5083" b="6125">in</wd>

<space/>

<wd l="5155" t="5990" r="5390" b="6130">the</wd>

<space/>

</ln>

<ln l="1838" t="6226" r="5381" b="6403" baseLine="6355" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="6226" r="2218" b="6365">form</wd>

<space/>

<wd l="2256" t="6226" r="2424" b="6365">of</wd>

<space/>

<wd l="2453" t="6226" r="2995" b="6365">written</wd>

<space/>

<wd l="3038" t="6226" r="3773" b="6403">language.</wd>

<space/>

<wd l="3840" t="6230" r="4162" b="6365">Due</wd>

<space/>

<wd l="4200" t="6245" r="4344" b="6365">to</wd>

<space/>

<wd l="4392" t="6226" r="5069" b="6365">Twitter’s</wd>

<space/>

<wd l="5112" t="6226" r="5381" b="6365">fre-</wd>

</ln>

<ln l="1838" t="6456" r="5386" b="6634" baseLine="6586" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="6456" r="2414" b="6634">quently</wd>

<space/>

<wd l="2477" t="6456" r="3139" b="6595">informal</wd>

<space/>

<wd l="3202" t="6475" r="3571" b="6619">tone,</wd>

<space/>

<wd l="3643" t="6475" r="3936" b="6595">text</wd>

<space/>

<wd l="3989" t="6456" r="5064" b="6595">normalization</wd>

<space/>

<wd l="5126" t="6499" r="5386" b="6595">can</wd>

<space/>

</ln>

<ln l="1838" t="6686" r="5381" b="6864" baseLine="6821" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="6686" r="2021" b="6826">be</wd>

<space/>

<wd l="2083" t="6730" r="2165" b="6826">a</wd>

<space/>

<wd l="2227" t="6686" r="2750" b="6826">crucial</wd>

<space/>

<wd l="2813" t="6686" r="3422" b="6826">element</wd>

<space/>

<wd l="3480" t="6686" r="3706" b="6826">for</wd>

<space/>

<wd l="3768" t="6686" r="4541" b="6864">exploiting</wd>

<space/>

<wd l="4603" t="6686" r="4896" b="6826">that</wd>

<space/>

<wd l="4954" t="6686" r="5381" b="6826">infor-</wd>

</ln>

<ln l="1838" t="6922" r="5386" b="7099" baseLine="7051" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="6922" r="2410" b="7061">mation.</wd>

<space/>

<wd l="2544" t="6922" r="2880" b="7061">This</wd>

<space/>

<wd l="2957" t="6965" r="3389" b="7099">paper</wd>

<space/>

<wd l="3461" t="6922" r="4061" b="7061">outlines</wd>

<space/>

<wd l="4142" t="6965" r="4397" b="7061">our</wd>

<space/>

<wd l="4469" t="6922" r="5170" b="7099">approach</wd>

<space/>

<wd l="5242" t="6941" r="5386" b="7061">to</wd>

<space/>

</ln>

<ln l="1838" t="7152" r="5390" b="7291" baseLine="7282" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="7171" r="2126" b="7291">text</wd>

<space/>

<wd l="2184" t="7152" r="3259" b="7291">normalization</wd>

<space/>

<wd l="3322" t="7152" r="3672" b="7291">used</wd>

<space/>

<wd l="3734" t="7152" r="3883" b="7286">in</wd>

<space/>

<wd l="3946" t="7152" r="4186" b="7291">the</wd>

<space/>

<wd l="4243" t="7157" r="4819" b="7291">WNUT</wd>

<space/>

<wd l="4896" t="7152" r="5390" b="7291">shared</wd>

<space/>

</ln>

<ln l="1838" t="7382" r="5381" b="7560" baseLine="7517" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="7382" r="2184" b="7522">task.</wd>

<space/>

<wd l="2448" t="7387" r="2698" b="7522">We</wd>

<space/>

<wd l="2818" t="7382" r="3206" b="7522">show</wd>

<space/>

<wd l="3326" t="7382" r="3619" b="7522">that</wd>

<space/>

<wd l="3730" t="7426" r="3811" b="7522">a</wd>

<space/>

<wd l="3922" t="7426" r="4262" b="7560">very</wd>

<space/>

<wd l="4387" t="7382" r="4891" b="7560">simple</wd>

<space/>

<wd l="5011" t="7382" r="5381" b="7522">solu-</wd>

</ln>

<ln l="1838" t="7613" r="5381" b="7790" baseLine="7747" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="7613" r="2174" b="7776">tion,</wd>

<space/>

<wd l="2256" t="7613" r="2914" b="7790">powered</wd>

<space/>

<wd l="2981" t="7613" r="3173" b="7790">by</wd>

<space/>

<wd l="3245" t="7656" r="3326" b="7752">a</wd>

<space/>

<wd l="3394" t="7613" r="4099" b="7790">modestly</wd>

<space/>

<wd l="4176" t="7613" r="4603" b="7776">sized,</wd>

<space/>

<wd l="4680" t="7613" r="5381" b="7790">partially-</wd>

</ln>

<ln l="1843" t="7848" r="5381" b="7987" baseLine="7978" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1843" t="7848" r="2405" b="7987">curated</wd>

<space/>

<wd l="2443" t="7848" r="4032" b="7987">wordlist—combined</wd>

<space/>

<wd l="4070" t="7848" r="4411" b="7987">with</wd>

<space/>

<wd l="4459" t="7891" r="4541" b="7987">a</wd>

<space/>

<wd l="4579" t="7848" r="5141" b="7987">modest</wd>

<space/>

<wd l="5174" t="7891" r="5381" b="7987">re-</wd>

</ln>

<ln l="1838" t="8078" r="5381" b="8256" baseLine="8213" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1838" t="8078" r="2419" b="8256">ranking</wd>

<space/>

<wd l="2515" t="8078" r="3538" b="8218">scheme—can</wd>

<space/>

<wd l="3629" t="8078" r="4152" b="8218">deliver</wd>

<space/>

<wd l="4229" t="8078" r="5098" b="8256">respectable</wd>

<space/>

<wd l="5174" t="8122" r="5381" b="8218">re-</wd>

</ln>

<ln l="1848" t="8309" r="2227" b="8448" baseLine="8443" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1848" t="8309" r="2227" b="8448">sults.</wd>

</ln>

</para>

<para l="1426" t="8856" r="3019" b="9024" alignment="left" spaceBefore="313" lsp="exactly" lspExact="266" language="en">

<ln l="1426" t="8856" r="3019" b="9024" baseLine="9014" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="7">

<wd l="1426" t="8856" r="1517" b="9019">1</wd>

<space/>

<wd l="1762" t="8856" r="3019" b="9024">Introduction</wd>

</ln>

</para>

<para l="1411" t="9264" r="5818" b="11290" alignment="justified" spaceBefore="128" lsp="exactly" lspExact="262" language="en">

<ln l="1411" t="9264" r="5813" b="9461" baseLine="9408" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="9264" r="2021" b="9418">Twitter</wd>

<space/>

<wd l="2093" t="9264" r="2232" b="9418">is</wd>

<space/>

<wd l="2318" t="9312" r="2506" b="9418">an</wd>

<space/>

<wd l="2587" t="9264" r="3394" b="9446">immense,</wd>

<space/>

<wd l="3485" t="9264" r="3970" b="9461">living</wd>

<space/>

<wd l="4056" t="9264" r="4886" b="9418">collection</wd>

<space/>

<wd l="4968" t="9264" r="5150" b="9418">of</wd>

<space/>

<wd l="5213" t="9264" r="5813" b="9418">written</wd>

<space/>

</ln>

<ln l="1411" t="9528" r="5803" b="9725" baseLine="9672" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="9528" r="2174" b="9725">language</wd>

<space/>

<wd l="2242" t="9528" r="2654" b="9682">from</wd>

<space/>

<wd l="2722" t="9528" r="2918" b="9682">all</wd>

<space/>

<wd l="2995" t="9576" r="3360" b="9682">over</wd>

<space/>

<wd l="3422" t="9528" r="3677" b="9682">the</wd>

<space/>

<wd l="3744" t="9528" r="4272" b="9682">world.</wd>

<space/>

<wd l="4382" t="9533" r="4882" b="9725">Every</wd>

<space/>

<wd l="4954" t="9528" r="5285" b="9725">day,</wd>

<space/>

<wd l="5362" t="9528" r="5803" b="9682">Twit-</wd>

</ln>

<ln l="1411" t="9758" r="5808" b="9989" baseLine="9934">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1411" t="9816" r="1637" b="9946">ter</wd>

<space/>

<wd l="1699" t="9792" r="2491" b="9989">publishes</wd>

<space/>

<wd l="2563" t="9840" r="2654" b="9946">a</wd>

<space/>

<wd l="2726" t="9792" r="3590" b="9989">staggering</wd>

<space/>

<wd l="3662" t="9792" r="3970" b="9946">500</wd>

<space/>

<wd l="4037" t="9792" r="4651" b="9946">million</wd>

<space/>

</run>

<wd l="4714" t="9758" r="5376" b="9946"><run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">tweets</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="5482" t="9792" r="5808" b="9946">The</wd>

<space/>

</run>

</ln>

<ln l="1416" t="10051" r="5808" b="10248" baseLine="10200" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="10075" r="2040" b="10205">content</wd>

<space/>

<wd l="2136" t="10051" r="2318" b="10205">of</wd>

<space/>

<wd l="2405" t="10051" r="3014" b="10205">Twitter</wd>

<space/>

<wd l="3106" t="10051" r="3245" b="10205">is</wd>

<space/>

<wd l="3346" t="10051" r="4056" b="10248">virtually</wd>

<space/>

<wd l="4157" t="10051" r="5011" b="10234">unlimited,</wd>

<space/>

<wd l="5136" t="10051" r="5434" b="10205">and</wd>

<space/>

<wd l="5530" t="10051" r="5808" b="10205">has</wd>

<space/>

</ln>

<ln l="1411" t="10315" r="5803" b="10512" baseLine="10459" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="10363" r="1992" b="10512">proven</wd>

<space/>

<wd l="2040" t="10315" r="2558" b="10469">useful</wd>

<space/>

<wd l="2602" t="10315" r="2851" b="10469">for</wd>

<space/>

<wd l="2894" t="10315" r="3365" b="10469">much</wd>

<space/>

<wd l="3413" t="10315" r="4157" b="10498">research,</wd>

<space/>

<wd l="4214" t="10315" r="5011" b="10512">including</wd>

<space/>

<wd l="5064" t="10315" r="5803" b="10512">epidemi-</wd>

</ln>

<ln l="1416" t="10579" r="5818" b="10776" baseLine="10723" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="10579" r="1939" b="10776">ology:</wd>

<space/>

<wd l="2040" t="10579" r="2520" b="10733">Chew</wd>

<space/>

<wd l="2592" t="10579" r="2894" b="10733">and</wd>

<space/>

<wd l="2957" t="10579" r="3874" b="10776">Eysenbach</wd>

<space/>

<wd l="3941" t="10584" r="4546" b="10766">(2010);</wd>

<space/>

<wd l="4627" t="10579" r="4930" b="10733">and</wd>

<space/>

<wd l="5002" t="10579" r="5818" b="10733">sentiment</wd>

<space/>

</ln>

<ln l="1416" t="10843" r="5798" b="11040" baseLine="10987" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="10843" r="2136" b="11040">analysis:</wd>

<space/>

<wd l="2246" t="10843" r="2942" b="10997">Barbosa</wd>

<space/>

<wd l="3014" t="10843" r="3312" b="10997">and</wd>

<space/>

<wd l="3379" t="10848" r="3797" b="11040">Feng</wd>

<space/>

<wd l="3878" t="10848" r="4478" b="11030">(2010),</wd>

<space/>

<wd l="4560" t="10843" r="5318" b="10997">Bakliwal</wd>

<space/>

<wd l="5390" t="10867" r="5544" b="10997">et</wd>

<space/>

<wd l="5611" t="10843" r="5798" b="10997">al.</wd>

<space/>

</ln>

<ln l="1421" t="11102" r="5424" b="11290" baseLine="11251" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="11107" r="2021" b="11290">(2013),</wd>

<space/>

<wd l="2083" t="11102" r="2928" b="11256">Rosenthal</wd>

<space/>

<wd l="2986" t="11126" r="3139" b="11256">et</wd>

<space/>

<wd l="3192" t="11102" r="3379" b="11256">al.</wd>

<space/>

<wd l="3456" t="11102" r="4056" b="11290">(2015),</wd>

<space/>

<wd l="4118" t="11102" r="4306" b="11251">Li</wd>

<space/>

<wd l="4363" t="11126" r="4517" b="11256">et</wd>

<space/>

<wd l="4570" t="11102" r="4757" b="11256">al.</wd>

<space/>

<wd l="4834" t="11107" r="5424" b="11290">(2014).</wd>

</ln>

</para>

<para l="1411" t="11366" r="5818" b="14194" alignment="justified" spaceAfter="74" fli="216" lsp="exactly" lspExact="262" language="en">

<ln l="1622" t="11366" r="5803" b="11563" baseLine="11515" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1622" t="11371" r="1757" b="11520">It</wd>

<space/>

<wd l="1810" t="11366" r="2338" b="11520">would</wd>

<space/>

<wd l="2390" t="11366" r="2741" b="11520">take</wd>

<space/>

<wd l="2798" t="11414" r="3259" b="11563">many</wd>

<space/>

<wd l="3322" t="11366" r="3926" b="11520">readers</wd>

<space/>

<wd l="3989" t="11390" r="4147" b="11520">to</wd>

<space/>

<wd l="4210" t="11366" r="4598" b="11563">keep</wd>

<space/>

<wd l="4661" t="11414" r="4867" b="11563">up</wd>

<space/>

<wd l="4930" t="11366" r="5304" b="11520">with</wd>

<space/>

<wd l="5362" t="11366" r="5803" b="11520">Twit-</wd>

</ln>

<ln l="1411" t="11630" r="5803" b="11827" baseLine="11774" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="11635" r="1771" b="11784">ter’s</wd>

<space/>

<wd l="1862" t="11654" r="2443" b="11827">output,</wd>

<space/>

<wd l="2544" t="11630" r="2851" b="11813">but,</wd>

<space/>

<wd l="2952" t="11630" r="3912" b="11827">fortunately,</wd>

<space/>

<wd l="4008" t="11678" r="4253" b="11784">we</wd>

<space/>

<wd l="4339" t="11630" r="4728" b="11784">have</wd>

<space/>

<wd l="4814" t="11630" r="5400" b="11784">natural</wd>

<space/>

<wd l="5482" t="11630" r="5803" b="11784">lan-</wd>

</ln>

<ln l="1416" t="11894" r="5803" b="12091" baseLine="12038" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="11942" r="1915" b="12091">guage</wd>

<space/>

<wd l="1978" t="11894" r="2880" b="12091">processing</wd>

<space/>

<wd l="2957" t="11899" r="3480" b="12082">(NLP)</wd>

<space/>

<wd l="3547" t="11894" r="4262" b="12048">methods</wd>

<space/>

<wd l="4325" t="11894" r="4646" b="12048">that</wd>

<space/>

<wd l="4709" t="11942" r="4992" b="12048">can</wd>

<space/>

<wd l="5064" t="11918" r="5803" b="12048">automat-</wd>

</ln>

<ln l="1411" t="12154" r="5818" b="12350" baseLine="12302" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="12154" r="1877" b="12350">ically</wd>

<space/>

<wd l="1968" t="12154" r="2395" b="12336">filter,</wd>

<space/>

<wd l="2496" t="12154" r="3322" b="12336">condense,</wd>

<space/>

<wd l="3422" t="12202" r="3595" b="12307">or</wd>

<space/>

<wd l="3677" t="12178" r="4248" b="12307">extract</wd>

<space/>

<wd l="4325" t="12154" r="5323" b="12307">information</wd>

<space/>

<wd l="5400" t="12154" r="5818" b="12307">from</wd>

<space/>

</ln>

<ln l="1411" t="12418" r="5813" b="12614" baseLine="12566" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="12442" r="1766" b="12571">text.</wd>

<space/>

<wd l="1843" t="12422" r="2640" b="12600">However,</wd>

<space/>

<wd l="2698" t="12422" r="3091" b="12571">NLP</wd>

<space/>

<wd l="3144" t="12418" r="4085" b="12614">approaches</wd>

<space/>

<wd l="4142" t="12466" r="4392" b="12571">are</wd>

<space/>

<wd l="4440" t="12418" r="5174" b="12614">typically</wd>

<space/>

<wd l="5227" t="12418" r="5813" b="12571">trained</wd>

<space/>

</ln>

<ln l="1416" t="12682" r="5803" b="12878" baseLine="12826" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="12730" r="1622" b="12835">on</wd>

<space/>

<wd l="1704" t="12682" r="2266" b="12835">formal</wd>

<space/>

<wd l="2352" t="12682" r="2866" b="12835">edited</wd>

<space/>

<wd l="2942" t="12706" r="3302" b="12864">text,</wd>

<space/>

<wd l="3403" t="12682" r="3701" b="12835">and</wd>

<space/>

<wd l="3792" t="12682" r="4459" b="12878">struggle</wd>

<space/>

<wd l="4541" t="12682" r="4920" b="12835">with</wd>

<space/>

<wd l="4997" t="12682" r="5251" b="12835">the</wd>

<space/>

<wd l="5338" t="12682" r="5803" b="12835">infor-</wd>

</ln>

<ln l="1411" t="12946" r="5803" b="13128" baseLine="13090" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="12946" r="1776" b="13128">mal,</wd>

<space/>

<wd l="1867" t="12946" r="2597" b="13099">unedited</wd>

<space/>

<wd l="2674" t="12970" r="2990" b="13099">text</wd>

<space/>

<wd l="3067" t="12946" r="3254" b="13099">of</wd>

<space/>

<wd l="3317" t="12946" r="3950" b="13099">Twitter.</wd>

<space/>

<wd l="4104" t="12950" r="4416" b="13099">But</wd>

<space/>

<wd l="4488" t="12946" r="4910" b="13099">there</wd>

<space/>

<wd l="4987" t="12946" r="5122" b="13099">is</wd>

<space/>

<wd l="5213" t="12994" r="5304" b="13099">a</wd>

<space/>

<wd l="5376" t="12946" r="5803" b="13099">well-</wd>

</ln>

<ln l="1411" t="13205" r="5803" b="13402" baseLine="13354">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1411" t="13205" r="1982" b="13358">known</wd>

<space/>

<wd l="2059" t="13253" r="2405" b="13402">way</wd>

<space/>

<wd l="2486" t="13229" r="2645" b="13358">to</wd>

<space/>

<wd l="2726" t="13205" r="3418" b="13402">mitigate</wd>

<space/>

<wd l="3494" t="13205" r="3797" b="13358">this</wd>

<space/>

<wd l="3878" t="13205" r="4627" b="13402">problem:</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="4757" t="13234" r="5064" b="13358">text</wd>

<space/>

<wd l="5131" t="13205" r="5803" b="13358">normal-</wd>

</run>

</ln>

<ln l="1421" t="13469" r="5813" b="13666" baseLine="13618">

<wd l="1421" t="13478" r="2030" b="13651"><run italic="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">ization</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="2093" t="13469" r="2338" b="13622">i.e.</wd>

<space/>

<wd l="2414" t="13469" r="3197" b="13666">replacing</wd>

<space/>

<wd l="3250" t="13469" r="4354" b="13622">non-standard</wd>

<space/>

<wd l="4402" t="13469" r="4949" b="13622">tokens</wd>

<space/>

<wd l="5002" t="13469" r="5376" b="13622">with</wd>

<space/>

<wd l="5424" t="13469" r="5813" b="13622">their</wd>

<space/>

</run>

</ln>

<ln l="1421" t="13733" r="5808" b="13930" baseLine="13877" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="13733" r="2131" b="13886">standard</wd>

<space/>

<wd l="2189" t="13733" r="3182" b="13930">equivalents,</wd>

<space/>

<wd l="3250" t="13733" r="3936" b="13930">yielding</wd>

<space/>

<wd l="3998" t="13757" r="4315" b="13886">text</wd>

<space/>

<wd l="4368" t="13733" r="4685" b="13886">that</wd>

<space/>

<wd l="4738" t="13733" r="5069" b="13886">will</wd>

<space/>

<wd l="5122" t="13733" r="5318" b="13886">be</wd>

<space/>

<wd l="5376" t="13781" r="5808" b="13886">more</wd>

<space/>

</ln>

<ln l="1416" t="13997" r="2909" b="14194" baseLine="14141" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="13997" r="2222" b="14194">agreeable</wd>

<space/>

<wd l="2275" t="14021" r="2438" b="14150">to</wd>

<space/>

<wd l="2496" t="14002" r="2909" b="14150">NLP.</wd>

</ln>

</para>

<rulerline l="1411" t="14285" r="2578" b="14285" type="single" width="10" color="000000"/>

<para l="1411" t="14338" r="5453" b="14712" alignment="left" ri="360" spaceBefore="44" spaceAfter="11" fli="216" lsp="exactly" lspExact="213" language="en">

<ln l="1670" t="14338" r="5453" b="14530" baseLine="14489">

<wd l="1670" t="14338" r="5453" b="14530">1https://blog.twitter.com/2013/new-tweets-per-second-</wd>

</ln>

<run underlined="none" subsuperscript="superscript" fontSize="650" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">https://blog.twitter.com/2013/new-tweets-per-second-</run>

<ln l="1411" t="14587" r="2520" b="14712" baseLine="14702" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="14587" r="2520" b="14712">record-and-how</wd>

</ln>

</para>

</column>

<column l="6091" t="5272" r="10502" b="14764">

<para l="6091" t="5338" r="10493" b="8957" alignment="justified" spaceBefore="5" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="6307" t="5338" r="10488" b="5539" baseLine="5486" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6307" t="5347" r="6653" b="5496">One</wd>

<space/>

<wd l="6725" t="5342" r="7205" b="5496">flavor</wd>

<space/>

<wd l="7272" t="5342" r="7454" b="5496">of</wd>

<space/>

<wd l="7502" t="5342" r="8611" b="5496">non-standard</wd>

<space/>

<wd l="8674" t="5342" r="9907" b="5539">writing—what</wd>

<space/>

<wd l="9970" t="5347" r="10037" b="5491">I</wd>

<space/>

<wd l="10099" t="5342" r="10488" b="5496">have</wd>

<space/>

</ln>

<ln l="6091" t="5597" r="10493" b="5798" baseLine="5750" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="5602" r="6974" b="5798">previously</wd>

<space/>

<wd l="7066" t="5602" r="7723" b="5755">focused</wd>

<space/>

<wd l="7814" t="5602" r="8371" b="5755">on—is</wd>

<space/>

<wd l="8462" t="5602" r="8875" b="5755">what</wd>

<space/>

<wd l="8962" t="5606" r="9029" b="5750">I</wd>

<space/>

<wd l="9120" t="5602" r="9422" b="5755">call</wd>

<space/>

<wd l="9514" t="5602" r="10493" b="5755">“vernacular</wd>

<space/>

</ln>

<ln l="6096" t="5861" r="10488" b="6062" baseLine="6014" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6096" t="5866" r="7214" b="6062">orthography”</wd>

<space/>

<wd l="7330" t="5870" r="7795" b="6053">(VO).</wd>

<space/>

<wd l="7910" t="5870" r="8203" b="6019">VO</wd>

<space/>

<wd l="8309" t="5866" r="8443" b="6019">is</wd>

<space/>

<wd l="8563" t="5866" r="9216" b="6062">spelling</wd>

<space/>

<wd l="9322" t="5866" r="9643" b="6019">that</wd>

<space/>

<wd l="9739" t="5866" r="10488" b="6019">indicates</wd>

<space/>

</ln>

<ln l="6091" t="6125" r="10493" b="6326" baseLine="6274" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="6130" r="6893" b="6283">itentional</wd>

<space/>

<wd l="6941" t="6130" r="8045" b="6283">non-standard</wd>

<space/>

<wd l="8093" t="6130" r="9312" b="6326">pronunciation,</wd>

<space/>

<wd l="9384" t="6130" r="9758" b="6283">such</wd>

<space/>

<wd l="9816" t="6178" r="9979" b="6283">as</wd>

<space/>

<wd l="10032" t="6130" r="10493" b="6283">when</wd>

<space/>

</ln>

<ln l="6091" t="6389" r="10483" b="6590" baseLine="6538" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="6394" r="6350" b="6547">the</wd>

<space/>

<wd l="6461" t="6394" r="6931" b="6590">string</wd>

<space/>

<wd l="7042" t="6394" r="7474" b="6547">“dat”</wd>

<space/>

<wd l="7594" t="6394" r="8107" b="6547">stands</wd>

<space/>

<wd l="8213" t="6394" r="8376" b="6542">in</wd>

<space/>

<wd l="8477" t="6394" r="8726" b="6547">for</wd>

<space/>

<wd l="8832" t="6394" r="9370" b="6547">“that”.</wd>

<space/>

<wd l="9590" t="6394" r="10109" b="6547">While</wd>

<space/>

<wd l="10210" t="6442" r="10483" b="6547">nu-</wd>

</ln>

<ln l="6091" t="6648" r="10493" b="6850" baseLine="6802" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="6701" r="6710" b="6806">merous</wd>

<space/>

<wd l="6768" t="6701" r="7315" b="6850">papers</wd>

<space/>

<wd l="7378" t="6653" r="7781" b="6806">offer</wd>

<space/>

<wd l="7843" t="6653" r="8592" b="6806">solutions</wd>

<space/>

<wd l="8650" t="6653" r="8899" b="6806">for</wd>

<space/>

<wd l="8947" t="6677" r="9269" b="6806">text</wd>

<space/>

<wd l="9317" t="6653" r="10493" b="6806">normalization</wd>

<space/>

</ln>

<ln l="6101" t="6912" r="10493" b="7114" baseLine="7066" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6101" t="6922" r="6456" b="7114">(e.g.</wd>

<space/>

<wd l="6547" t="6922" r="6893" b="7070">Han</wd>

<space/>

<wd l="6960" t="6917" r="7262" b="7070">and</wd>

<space/>

<wd l="7315" t="6917" r="8035" b="7070">Baldwin</wd>

<space/>

<wd l="8102" t="6922" r="8698" b="7104">(2011),</wd>

<space/>

<wd l="8770" t="6922" r="9197" b="7114">Yang</wd>

<space/>

<wd l="9264" t="6917" r="9566" b="7070">and</wd>

<space/>

<wd l="9624" t="6917" r="10493" b="7070">Eisenstein</wd>

<space/>

</ln>

<ln l="6101" t="7176" r="10483" b="7378" baseLine="7325" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6101" t="7186" r="6701" b="7368">(2013),</wd>

<space/>

<wd l="6811" t="7181" r="7344" b="7378">Zhang</wd>

<space/>

<wd l="7445" t="7205" r="7594" b="7334">et</wd>

<space/>

<wd l="7685" t="7181" r="7872" b="7334">al.</wd>

<space/>

<wd l="7987" t="7186" r="8587" b="7368">(2013),</wd>

<space/>

<wd l="8698" t="7186" r="9250" b="7378">Sproat</wd>

<space/>

<wd l="9341" t="7205" r="9494" b="7334">et</wd>

<space/>

<wd l="9586" t="7181" r="9773" b="7334">al.</wd>

<space/>

<wd l="9883" t="7186" r="10483" b="7368">(2001),</wd>

<space/>

</ln>

<ln l="6091" t="7440" r="10493" b="7632" baseLine="7589" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="7445" r="6278" b="7594">Li</wd>

<space/>

<wd l="6360" t="7445" r="6662" b="7598">and</wd>

<space/>

<wd l="6739" t="7445" r="7032" b="7598">Liu</wd>

<space/>

<wd l="7118" t="7450" r="7786" b="7632">(2014)),</wd>

<space/>

<wd l="7882" t="7445" r="8184" b="7598">and</wd>

<space/>

<wd l="8266" t="7493" r="8357" b="7598">a</wd>

<space/>

<wd l="8429" t="7445" r="8736" b="7598">few</wd>

<space/>

<wd l="8818" t="7445" r="9245" b="7598">build</wd>

<space/>

<wd l="9322" t="7445" r="9931" b="7598">models</wd>

<space/>

<wd l="10013" t="7445" r="10493" b="7598">based</wd>

<space/>

</ln>

<ln l="6096" t="7699" r="10483" b="7901" baseLine="7853" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6096" t="7752" r="6302" b="7858">on</wd>

<space/>

<wd l="6389" t="7704" r="7219" b="7901">phonemic</wd>

<space/>

<wd l="7315" t="7704" r="8112" b="7901">similarity</wd>

<space/>

<wd l="8208" t="7709" r="8558" b="7901">(e.g.</wd>

<space/>

<wd l="8731" t="7704" r="9264" b="7858">Kobus</wd>

<space/>

<wd l="9355" t="7728" r="9509" b="7858">et</wd>

<space/>

<wd l="9590" t="7704" r="9778" b="7858">al.</wd>

<space/>

<wd l="9883" t="7709" r="10483" b="7891">(2008),</wd>

<space/>

</ln>

<ln l="6096" t="7963" r="10488" b="8165" baseLine="8117" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6096" t="7968" r="7042" b="8165">Choudhury</wd>

<space/>

<wd l="7138" t="7992" r="7286" b="8122">et</wd>

<space/>

<wd l="7378" t="7968" r="7565" b="8122">al.</wd>

<space/>

<wd l="7675" t="7973" r="8342" b="8155">(2007)),</wd>

<space/>

<wd l="8448" t="8016" r="8856" b="8122">none</wd>

<space/>

<wd l="8947" t="7992" r="9106" b="8122">to</wd>

<space/>

<wd l="9206" t="8016" r="9485" b="8122">our</wd>

<space/>

<wd l="9571" t="7968" r="10488" b="8165">knowledge</wd>

<space/>

</ln>

<ln l="6091" t="8227" r="10483" b="8429" baseLine="8376" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="8232" r="6480" b="8386">have</wd>

<space/>

<wd l="6533" t="8232" r="7363" b="8386">addressed</wd>

<space/>

<wd l="7406" t="8237" r="7699" b="8386">VO</wd>

<space/>

<wd l="7747" t="8232" r="7915" b="8381">in</wd>

<space/>

<wd l="7958" t="8232" r="8794" b="8429">particular.</wd>

<space/>

<wd l="8870" t="8232" r="9245" b="8386">This</wd>

<space/>

<wd l="9293" t="8280" r="9797" b="8429">paper,</wd>

<space/>

<wd l="9854" t="8256" r="10166" b="8414">too,</wd>

<space/>

<wd l="10229" t="8232" r="10483" b="8386">ad-</wd>

</ln>

<ln l="6096" t="8491" r="10488" b="8693" baseLine="8640" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6096" t="8496" r="6696" b="8650">dresses</wd>

<space/>

<wd l="6758" t="8496" r="7018" b="8650">the</wd>

<space/>

<wd l="7080" t="8496" r="7699" b="8693">general</wd>

<space/>

<wd l="7757" t="8496" r="8933" b="8650">normalization</wd>

<space/>

<wd l="8990" t="8496" r="9739" b="8693">problem,</wd>

<space/>

<wd l="9806" t="8496" r="10075" b="8650">but</wd>

<space/>

<wd l="10128" t="8544" r="10488" b="8650">uses</wd>

<space/>

</ln>

<ln l="6091" t="8755" r="9845" b="8957" baseLine="8904" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="8760" r="6696" b="8914">lessons</wd>

<space/>

<wd l="6758" t="8760" r="7378" b="8914">learned</wd>

<space/>

<wd l="7435" t="8760" r="8333" b="8957">attempting</wd>

<space/>

<wd l="8390" t="8784" r="8549" b="8914">to</wd>

<space/>

<wd l="8606" t="8760" r="9451" b="8914">normalize</wd>

<space/>

<wd l="9509" t="8765" r="9845" b="8914">VO.</wd>

</ln>

</para>

<para l="6091" t="9187" r="8458" b="9398" alignment="left" spaceBefore="185" lsp="exactly" lspExact="266" language="en">

<ln l="6091" t="9187" r="8458" b="9398" baseLine="9350" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="6091" t="9187" r="6206" b="9350">2</wd>

<space/>

<wd l="6446" t="9187" r="7147" b="9398">System</wd>

<space/>

<wd l="7205" t="9187" r="8458" b="9355">Architecture</wd>

</ln>

</para>

<para l="6091" t="9590" r="10483" b="10843" alignment="justified" spaceBefore="120" lsp="exactly" lspExact="263" language="en">

<ln l="6091" t="9590" r="10483" b="9792" baseLine="9739" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="9595" r="6418" b="9749">The</wd>

<space/>

<wd l="6490" t="9595" r="7478" b="9749">architecture</wd>

<space/>

<wd l="7550" t="9595" r="7733" b="9749">of</wd>

<space/>

<wd l="7781" t="9595" r="8083" b="9749">this</wd>

<space/>

<wd l="8160" t="9619" r="8741" b="9792">system</wd>

<space/>

<wd l="8798" t="9595" r="8933" b="9749">is</wd>

<space/>

<wd l="9000" t="9643" r="9370" b="9792">very</wd>

<space/>

<wd l="9446" t="9595" r="10042" b="9792">simple,</wd>

<space/>

<wd l="10123" t="9643" r="10483" b="9749">con-</wd>

</ln>

<ln l="6101" t="9850" r="10483" b="10051" baseLine="10003" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6101" t="9854" r="6638" b="10051">sisting</wd>

<space/>

<wd l="6701" t="9854" r="6883" b="10008">of</wd>

<space/>

<wd l="6922" t="9854" r="7344" b="10008">three</wd>

<space/>

<wd l="7397" t="9854" r="7819" b="10008">main</wd>

<space/>

<wd l="7867" t="9883" r="8323" b="10051">parts:</wd>

<space/>

<wd l="8414" t="9859" r="8640" b="10042">(1)</wd>

<space/>

<wd l="8707" t="9902" r="8798" b="10008">a</wd>

<space/>

<wd l="8856" t="9854" r="9830" b="10008">substitution</wd>

<space/>

<wd l="9883" t="9854" r="10186" b="10037">list,</wd>

<space/>

<wd l="10253" t="9859" r="10483" b="10042">(2)</wd>

<space/>

</ln>

<ln l="6096" t="10114" r="10483" b="10315" baseLine="10262" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="10166" r="6187" b="10272">a</wd>

<space/>

<wd l="6264" t="10118" r="6821" b="10315">couple</wd>

<space/>

<wd l="6907" t="10118" r="7090" b="10272">of</wd>

<space/>

<wd l="7152" t="10118" r="7478" b="10272">rule</wd>

<space/>

<wd l="7555" t="10118" r="8035" b="10272">based</wd>

<space/>

<wd l="8117" t="10142" r="9178" b="10315">components,</wd>

<space/>

<wd l="9274" t="10118" r="9576" b="10272">and</wd>

<space/>

<wd l="9658" t="10123" r="9888" b="10306">(3)</wd>

<space/>

<wd l="9974" t="10166" r="10066" b="10272">a</wd>

<space/>

<wd l="10147" t="10166" r="10483" b="10272">sen-</wd>

</ln>

<ln l="6091" t="10378" r="10483" b="10579" baseLine="10526" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="10406" r="6538" b="10536">tence</wd>

<space/>

<wd l="6614" t="10382" r="7018" b="10536">level</wd>

<space/>

<wd l="7090" t="10382" r="7891" b="10536">re-ranker.</wd>

<space/>

<wd l="8035" t="10382" r="8410" b="10536">This</wd>

<space/>

<wd l="8486" t="10382" r="9206" b="10579">provides</wd>

<space/>

<wd l="9288" t="10382" r="9538" b="10536">for</wd>

<space/>

<wd l="9614" t="10430" r="9706" b="10536">a</wd>

<space/>

<wd l="9778" t="10382" r="10085" b="10536">fast</wd>

<space/>

<wd l="10157" t="10430" r="10483" b="10579">per-</wd>

</ln>

<ln l="6091" t="10642" r="7723" b="10843" baseLine="10790" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="10646" r="6562" b="10800">token</wd>

<space/>

<wd l="6614" t="10646" r="7723" b="10843">performance.</wd>

</ln>

</para>

<para l="6091" t="11059" r="8083" b="11218" alignment="left" spaceBefore="175" lsp="exactly" lspExact="245" language="en">

<ln l="6091" t="11059" r="8083" b="11218" baseLine="11208" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="6091" t="11064" r="6346" b="11218">2.1</wd>

<space/>

<wd l="6571" t="11064" r="7670" b="11218">Substitution</wd>

<space/>

<wd l="7728" t="11064" r="8083" b="11218">List</wd>

</ln>

</para>

<para l="6091" t="11395" r="10498" b="13397" alignment="justified" spaceBefore="73" lsp="exactly" lspExact="263" language="en">

<ln l="6091" t="11395" r="10483" b="11597" baseLine="11549" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="11405" r="6533" b="11554">Most</wd>

<space/>

<wd l="6586" t="11400" r="6773" b="11554">of</wd>

<space/>

<wd l="6816" t="11400" r="7070" b="11554">the</wd>

<space/>

<wd l="7128" t="11400" r="7565" b="11554">work</wd>

<space/>

<wd l="7618" t="11400" r="7752" b="11554">is</wd>

<space/>

<wd l="7819" t="11400" r="8222" b="11554">done</wd>

<space/>

<wd l="8280" t="11400" r="8486" b="11597">by</wd>

<space/>

<wd l="8554" t="11448" r="8645" b="11554">a</wd>

<space/>

<wd l="8707" t="11400" r="10070" b="11597">semi-supervised</wd>

<space/>

<wd l="10138" t="11400" r="10483" b="11554">sub-</wd>

</ln>

<ln l="6101" t="11659" r="10498" b="11861" baseLine="11813" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6101" t="11664" r="6787" b="11818">stitution</wd>

<space/>

<wd l="6869" t="11664" r="7133" b="11818">list</wd>

<space/>

<wd l="7214" t="11664" r="8064" b="11861">consisting</wd>

<space/>

<wd l="8155" t="11664" r="8338" b="11818">of</wd>

<space/>

<wd l="8414" t="11664" r="9053" b="11818">ordered</wd>

<space/>

<wd l="9134" t="11664" r="9586" b="11861">pairs.</wd>

<space/>

<wd l="9754" t="11664" r="10080" b="11818">The</wd>

<space/>

<wd l="10166" t="11664" r="10498" b="11818">first</wd>

<space/>

</ln>

<ln l="6091" t="11923" r="10483" b="12125" baseLine="12072" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="11928" r="6787" b="12082">member</wd>

<space/>

<wd l="6854" t="11928" r="7037" b="12082">of</wd>

<space/>

<wd l="7094" t="11928" r="7474" b="12082">each</wd>

<space/>

<wd l="7541" t="11928" r="7872" b="12125">pair</wd>

<space/>

<wd l="7930" t="11928" r="8069" b="12082">is</wd>

<space/>

<wd l="8141" t="11976" r="8232" b="12082">a</wd>

<space/>

<wd l="8304" t="11928" r="8770" b="12125">string</wd>

<space/>

<wd l="8837" t="11928" r="9878" b="12125">representing</wd>

<space/>

<wd l="9950" t="11976" r="10042" b="12082">a</wd>

<space/>

<wd l="10104" t="11976" r="10483" b="12082">non-</wd>

</ln>

<ln l="6101" t="12187" r="10488" b="12389" baseLine="12336" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6101" t="12192" r="6811" b="12346">standard</wd>

<space/>

<wd l="6874" t="12192" r="7344" b="12346">word.</wd>

<space/>

<wd l="7459" t="12192" r="7786" b="12346">The</wd>

<space/>

<wd l="7862" t="12192" r="8443" b="12346">second</wd>

<space/>

<wd l="8506" t="12192" r="9202" b="12346">member</wd>

<space/>

<wd l="9269" t="12192" r="9451" b="12346">of</wd>

<space/>

<wd l="9509" t="12192" r="9893" b="12346">each</wd>

<space/>

<wd l="9955" t="12192" r="10286" b="12389">pair</wd>

<space/>

<wd l="10354" t="12192" r="10488" b="12346">is</wd>

<space/>

</ln>

<ln l="6096" t="12446" r="10488" b="12648" baseLine="12600" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="12499" r="6187" b="12605">a</wd>

<space/>

<wd l="6240" t="12451" r="6504" b="12605">list</wd>

<space/>

<wd l="6562" t="12451" r="6744" b="12605">of</wd>

<space/>

<wd l="6797" t="12451" r="7349" b="12648">strings</wd>

<space/>

<wd l="7406" t="12451" r="8448" b="12648">representing</wd>

<space/>

<wd l="8515" t="12451" r="9317" b="12605">candidate</wd>

<space/>

<wd l="9374" t="12451" r="10488" b="12648">replacements</wd>

<space/>

</ln>

<ln l="6091" t="12710" r="10483" b="12912" baseLine="12864" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="12715" r="6341" b="12869">for</wd>

<space/>

<wd l="6403" t="12715" r="6662" b="12869">the</wd>

<space/>

<wd l="6730" t="12715" r="7200" b="12869">word.</wd>

<space/>

<wd l="7320" t="12720" r="7613" b="12869">For</wd>

<space/>

<wd l="7680" t="12715" r="8434" b="12912">example,</wd>

<space/>

<wd l="8515" t="12763" r="8813" b="12869">one</wd>

<space/>

<wd l="8880" t="12715" r="9211" b="12912">pair</wd>

<space/>

<wd l="9278" t="12715" r="9413" b="12869">is</wd>

<space/>

<wd l="9494" t="12720" r="9893" b="12902">(“n”,</wd>

<space/>

<wd l="9979" t="12715" r="10483" b="12902">(“and,</wd>

<space/>

</ln>

<ln l="6091" t="12974" r="10488" b="13176" baseLine="13123" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="12979" r="6533" b="13166">in”)).</wd>

<space/>

<wd l="6653" t="12979" r="7142" b="13133">There</wd>

<space/>

<wd l="7219" t="13027" r="7469" b="13133">are</wd>

<space/>

<wd l="7522" t="12979" r="7848" b="13176">just</wd>

<space/>

<wd l="7915" t="13027" r="8285" b="13133">over</wd>

<space/>

<wd l="8347" t="12979" r="8923" b="13162">45,000</wd>

<space/>

<wd l="8995" t="12979" r="9451" b="13176">pairs,</wd>

<space/>

<wd l="9533" t="12979" r="10046" b="13133">where</wd>

<space/>

<wd l="10123" t="12979" r="10488" b="13176">only</wd>

<space/>

</ln>

<ln l="6091" t="13243" r="8717" b="13397" baseLine="13387" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="13243" r="6350" b="13397">the</wd>

<space/>

<wd l="6408" t="13243" r="6739" b="13397">first</wd>

<space/>

<wd l="6792" t="13248" r="7205" b="13397">2000</wd>

<space/>

<wd l="7267" t="13291" r="7517" b="13397">are</wd>

<space/>

<wd l="7574" t="13243" r="8717" b="13397">hand-curated.</wd>

</ln>

</para>

<para l="6091" t="13502" r="10498" b="14755" alignment="justified" spaceAfter="3" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="6302" t="13502" r="10488" b="13690" baseLine="13651" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6302" t="13512" r="6518" b="13661">To</wd>

<space/>

<wd l="6605" t="13531" r="7104" b="13661">create</wd>

<space/>

<wd l="7186" t="13507" r="7440" b="13661">the</wd>

<space/>

<wd l="7522" t="13507" r="7824" b="13690">list,</wd>

<space/>

<wd l="7920" t="13555" r="8165" b="13661">we</wd>

<space/>

<wd l="8246" t="13555" r="8525" b="13661">use</wd>

<space/>

<wd l="8611" t="13555" r="8702" b="13661">a</wd>

<space/>

<wd l="8789" t="13507" r="9614" b="13661">collection</wd>

<space/>

<wd l="9701" t="13507" r="9883" b="13661">of</wd>

<space/>

<wd l="9950" t="13531" r="10488" b="13661">tweets</wd>

<space/>

</ln>

<ln l="6101" t="13762" r="10493" b="13963" baseLine="13915" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6101" t="13771" r="6432" b="13954">(see</wd>

<space/>

<wd l="6509" t="13771" r="7464" b="13920">“Resources</wd>

<space/>

<wd l="7541" t="13766" r="8496" b="13963">Employed”</wd>

<space/>

<wd l="8587" t="13766" r="9240" b="13954">section)</wd>

<space/>

<wd l="9326" t="13766" r="9624" b="13920">and</wd>

<space/>

<wd l="9706" t="13814" r="9797" b="13920">a</wd>

<space/>

<wd l="9869" t="13766" r="10493" b="13920">derived</wd>

<space/>

</ln>

<ln l="6096" t="14026" r="10488" b="14227" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6096" t="14030" r="6979" b="14227">dictionary,</wd>

<space/>

<wd l="7070" t="14030" r="7877" b="14184">described</wd>

<space/>

<wd l="7949" t="14030" r="8746" b="14227">presently.</wd>

<space/>

<wd l="8880" t="14030" r="9206" b="14184">The</wd>

<space/>

<wd l="9288" t="14030" r="10133" b="14227">dictionary</wd>

<space/>

<wd l="10210" t="14030" r="10488" b="14184">has</wd>

<space/>

</ln>

<ln l="6091" t="14290" r="10498" b="14491" baseLine="14438" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="14294" r="6350" b="14448">the</wd>

<space/>

<wd l="6413" t="14299" r="6970" b="14477">18,000</wd>

<space/>

<wd l="7022" t="14294" r="7733" b="14448">standard</wd>

<space/>

<wd l="7771" t="14294" r="8285" b="14448">words</wd>

<space/>

<wd l="8328" t="14318" r="8746" b="14448">most</wd>

<space/>

<wd l="8784" t="14294" r="9490" b="14491">frequent</wd>

<space/>

<wd l="9528" t="14294" r="9691" b="14443">in</wd>

<space/>

<wd l="9734" t="14294" r="9989" b="14448">the</wd>

<space/>

<wd l="10032" t="14318" r="10498" b="14448">tweet</wd>

<space/>

</ln>

<ln l="6096" t="14554" r="10493" b="14755" baseLine="14702" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6096" t="14558" r="6965" b="14712">collection.</wd>

<space/>

<wd l="7080" t="14563" r="7373" b="14712">For</wd>

<space/>

<wd l="7435" t="14558" r="7690" b="14712">the</wd>

<space/>

<wd l="7762" t="14558" r="8803" b="14712">construction</wd>

<space/>

<wd l="8875" t="14558" r="9058" b="14712">of</wd>

<space/>

<wd l="9110" t="14558" r="9365" b="14712">the</wd>

<space/>

<wd l="9437" t="14558" r="10320" b="14755">dictionary,</wd>

<space/>

<wd l="10402" t="14606" r="10493" b="14712">a</wd>

</ln>

</para>

</column>

</section>

<section l="1411" t="14764" r="10502" b="16480">

<column l="1411" t="14764" r="10502" b="16480">

<para l="5809" t="15787" r="6148" b="15946" alignment="centered" spaceBefore="980" lsp="exactly" lspExact="249" language="en">

<ln l="5875" t="15787" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="39">

<wd l="5875" t="15792" r="6082" b="15946">82</wd>

</ln>

</para>

<para l="2918" t="16133" r="8981" b="16469" alignment="centered" spaceBefore="139" lsp="exactly" lspExact="170" language="en">

<ln l="2918" t="16133" r="8981" b="16301" baseLine="16253" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2918" t="16133" r="3802" b="16296">Proceedings</wd>

<space/>

<wd l="3854" t="16133" r="4018" b="16296">of</wd>

<space/>

<wd l="4037" t="16133" r="4248" b="16262">the</wd>

<space/>

<wd l="4286" t="16138" r="4622" b="16262">ACL</wd>

<space/>

<wd l="4666" t="16133" r="5026" b="16262">2015</wd>

<space/>

<wd l="5078" t="16133" r="5779" b="16296">Workshop</wd>

<space/>

<wd l="5832" t="16176" r="6000" b="16262">on</wd>

<space/>

<wd l="6043" t="16138" r="6456" b="16296">Noisy</wd>

<space/>

<wd l="6518" t="16133" r="7627" b="16296">User-generated</wd>

<space/>

</run>

<wd l="7675" t="16138" r="7992" b="16286"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Text</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8045" t="16171" r="8443" b="16301">pages</wd>

<space/>

<wd l="8506" t="16133" r="8981" b="16286">82–86,
</wd>

</run>

</ln>

<ln l="3029" t="16296" r="8870" b="16469" baseLine="16425" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="16301" r="3595" b="16469">Beijing,</wd>

<space/>

<wd l="3653" t="16301" r="4114" b="16454">China,</wd>

<space/>

<wd l="4166" t="16301" r="4459" b="16469">July</wd>

<space/>

<wd l="4512" t="16301" r="4723" b="16454">31,</wd>

<space/>

<wd l="4781" t="16301" r="5170" b="16430">2015.</wd>

<space/>

</run>

<wd l="5246" t="16296" r="5770" b="16469"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">�</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5822" t="16301" r="6672" b="16430">Association</wd>

<space/>

<wd l="6715" t="16301" r="6926" b="16430">for</wd>

<space/>

<wd l="6974" t="16301" r="8035" b="16469">Computational</wd>

<space/>

<wd l="8078" t="16301" r="8870" b="16469">Linguistics</wd>

</run>

</ln>

</para>

</column>

</section>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4312.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1399" marginTop="1440" marginRight="1402" marginBottom="1292" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1399" t="2134" r="10507" b="14777">

<column l="1399" t="2134" r="5834" b="14777">

<para l="1411" t="2198" r="5818" b="3710" alignment="justified" lsp="exactly" lspExact="263" language="en">

<ln l="1411" t="2198" r="5818" b="2352" baseLine="2342" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="2198" r="1848" b="2352">word</wd>

<space/>

<wd l="1920" t="2198" r="2059" b="2352">is</wd>

<space/>

<wd l="2146" t="2198" r="3058" b="2352">considered</wd>

<space/>

<wd l="3130" t="2222" r="3293" b="2352">to</wd>

<space/>

<wd l="3370" t="2198" r="3566" b="2352">be</wd>

<space/>

<wd l="3658" t="2198" r="4363" b="2352">standard</wd>

<space/>

<wd l="4440" t="2198" r="4579" b="2347">if</wd>

<space/>

<wd l="4646" t="2198" r="4766" b="2352">it</wd>

<space/>

<wd l="4838" t="2198" r="5117" b="2352">has</wd>

<space/>

<wd l="5203" t="2222" r="5352" b="2352">at</wd>

<space/>

<wd l="5424" t="2198" r="5818" b="2352">least</wd>

<space/>

</ln>

<ln l="1411" t="2462" r="5803" b="2659" baseLine="2606" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="2462" r="1766" b="2616">four</wd>

<space/>

<wd l="1824" t="2462" r="2722" b="2645">characters,</wd>

<space/>

<wd l="2789" t="2462" r="3086" b="2616">and</wd>

<space/>

<wd l="3139" t="2462" r="3274" b="2616">is</wd>

<space/>

<wd l="3336" t="2462" r="3826" b="2616">found</wd>

<space/>

<wd l="3878" t="2462" r="4046" b="2611">in</wd>

<space/>

<wd l="4099" t="2462" r="4354" b="2616">the</wd>

<space/>

<wd l="4411" t="2467" r="4891" b="2616">CMU</wd>

<space/>

<wd l="4944" t="2510" r="5803" b="2659">pronounc-</wd>

</ln>

<ln l="1411" t="2722" r="5803" b="2918" baseLine="2870" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="2722" r="1680" b="2918">ing</wd>

<space/>

<wd l="1752" t="2722" r="2635" b="2918">dictionary,</wd>

<space/>

<wd l="2722" t="2770" r="2928" b="2904">or,</wd>

<space/>

<wd l="3005" t="2722" r="3144" b="2870">if</wd>

<space/>

<wd l="3202" t="2722" r="3322" b="2875">it</wd>

<space/>

<wd l="3384" t="2722" r="3658" b="2875">has</wd>

<space/>

<wd l="3730" t="2722" r="4210" b="2875">fewer</wd>

<space/>

<wd l="4272" t="2722" r="4632" b="2875">than</wd>

<space/>

<wd l="4704" t="2722" r="5122" b="2875">three</wd>

<space/>

<wd l="5194" t="2722" r="5803" b="2875">charac-</wd>

</ln>

<ln l="1411" t="2952" r="5803" b="3182" baseLine="3131">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1411" t="3010" r="1762" b="3168">ters,</wd>

<space/>

<wd l="1814" t="2986" r="1939" b="3139">it</wd>

<space/>

<wd l="1978" t="2986" r="2112" b="3139">is</wd>

<space/>

<wd l="2160" t="2986" r="2654" b="3139">found</wd>

<space/>

<wd l="2698" t="2986" r="2861" b="3134">in</wd>

<space/>

<wd l="2904" t="2986" r="3163" b="3139">the</wd>

<space/>

<wd l="3206" t="2986" r="3802" b="3182">Norvig</wd>

<space/>

</run>

<wd l="3854" t="2952" r="4786" b="3182"><run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">dictionary</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4843" t="2986" r="5146" b="3139">and</wd>

<space/>

<wd l="5189" t="2986" r="5323" b="3139">is</wd>

<space/>

<wd l="5381" t="2986" r="5803" b="3139">suffi-</wd>

</run>

</ln>

<ln l="1416" t="3250" r="5808" b="3446" baseLine="3394" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3250" r="1982" b="3446">ciently</wd>

<space/>

<wd l="2030" t="3250" r="2741" b="3446">frequent</wd>

<space/>

<wd l="2789" t="3250" r="3365" b="3437">(where</wd>

<space/>

<wd l="3418" t="3250" r="4195" b="3403">sufficient</wd>

<space/>

<wd l="4234" t="3250" r="5074" b="3446">frequency</wd>

<space/>

<wd l="5126" t="3250" r="5808" b="3446">depends</wd>

<space/>

</ln>

<ln l="1416" t="3514" r="3115" b="3710" baseLine="3658" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3562" r="1622" b="3667">on</wd>

<space/>

<wd l="1680" t="3514" r="1934" b="3667">the</wd>

<space/>

<wd l="1987" t="3514" r="2424" b="3667">word</wd>

<space/>

<wd l="2477" t="3514" r="3115" b="3710">length).</wd>

</ln>

</para>

<para l="1411" t="3797" r="5818" b="6346" alignment="justified" spaceBefore="9" fli="216" lsp="exactly" lspExact="264" language="en">

<ln l="1622" t="3797" r="5818" b="3994" baseLine="3941" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1622" t="3802" r="2026" b="3950">Now</wd>

<space/>

<wd l="2078" t="3845" r="2318" b="3950">we</wd>

<space/>

<wd l="2366" t="3821" r="2779" b="3950">want</wd>

<space/>

<wd l="2822" t="3821" r="2981" b="3950">to</wd>

<space/>

<wd l="3038" t="3797" r="3365" b="3950">find</wd>

<space/>

<wd l="3408" t="3797" r="3662" b="3950">the</wd>

<space/>

<wd l="3710" t="3821" r="4128" b="3950">most</wd>

<space/>

<wd l="4171" t="3797" r="4877" b="3994">frequent</wd>

<space/>

<wd l="4925" t="3802" r="5443" b="3950">OOVs</wd>

<space/>

<wd l="5496" t="3797" r="5818" b="3950">that</wd>

<space/>

</ln>

<ln l="1411" t="4056" r="5818" b="4210" baseLine="4205" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="4056" r="1814" b="4210">need</wd>

<space/>

<wd l="1882" t="4080" r="2040" b="4210">to</wd>

<space/>

<wd l="2117" t="4056" r="2314" b="4210">be</wd>

<space/>

<wd l="2381" t="4056" r="3374" b="4210">normalized.</wd>

<space/>

<wd l="3499" t="4061" r="3773" b="4210">We</wd>

<space/>

<wd l="3845" t="4056" r="4560" b="4210">tokenize</wd>

<space/>

<wd l="4632" t="4056" r="4886" b="4210">the</wd>

<space/>

<wd l="4958" t="4056" r="5510" b="4210">twitter</wd>

<space/>

<wd l="5587" t="4080" r="5818" b="4210">set</wd>

<space/>

</ln>

<ln l="1416" t="4320" r="5798" b="4517" baseLine="4464" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="4320" r="1718" b="4474">and</wd>

<space/>

<wd l="1771" t="4320" r="2170" b="4474">filter</wd>

<space/>

<wd l="2222" t="4344" r="2491" b="4474">out</wd>

<space/>

<wd l="2539" t="4320" r="2741" b="4474">all</wd>

<space/>

<wd l="2794" t="4320" r="3341" b="4474">tokens</wd>

<space/>

<wd l="3394" t="4320" r="3715" b="4474">that</wd>

<space/>

<wd l="3763" t="4368" r="4325" b="4517">appear</wd>

<space/>

<wd l="4373" t="4320" r="4536" b="4469">in</wd>

<space/>

<wd l="4589" t="4368" r="4867" b="4474">our</wd>

<space/>

<wd l="4920" t="4320" r="5798" b="4517">dictionary.</wd>

<space/>

</ln>

<ln l="1411" t="4584" r="5808" b="4738" baseLine="4728" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="4589" r="1690" b="4738">We</wd>

<space/>

<wd l="1771" t="4584" r="2102" b="4738">also</wd>

<space/>

<wd l="2189" t="4584" r="2587" b="4738">filter</wd>

<space/>

<wd l="2669" t="4608" r="2938" b="4738">out</wd>

<space/>

<wd l="3014" t="4584" r="3216" b="4738">all</wd>

<space/>

<wd l="3302" t="4584" r="3845" b="4738">tokens</wd>

<space/>

<wd l="3926" t="4584" r="4248" b="4738">that</wd>

<space/>

<wd l="4330" t="4584" r="4531" b="4738">do</wd>

<space/>

<wd l="4613" t="4608" r="4886" b="4738">not</wd>

<space/>

<wd l="4958" t="4584" r="5477" b="4738">match</wd>

<space/>

<wd l="5554" t="4584" r="5808" b="4738">the</wd>

<space/>

</ln>

<ln l="1411" t="4848" r="5808" b="5045" baseLine="4992" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="4848" r="1982" b="5002">format</wd>

<space/>

<wd l="2069" t="4848" r="2251" b="5002">of</wd>

<space/>

<wd l="2328" t="4848" r="3427" b="5002">normalizable</wd>

<space/>

<wd l="3518" t="4848" r="4061" b="5002">tokens</wd>

<space/>

<wd l="4162" t="4896" r="4325" b="5002">as</wd>

<space/>

<wd l="4426" t="4848" r="5170" b="5045">specified</wd>

<space/>

<wd l="5256" t="4848" r="5462" b="5045">by</wd>

<space/>

<wd l="5554" t="4848" r="5808" b="5002">the</wd>

<space/>

</ln>

<ln l="1421" t="5107" r="5808" b="5304" baseLine="5256" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5107" r="1963" b="5261">shared</wd>

<space/>

<wd l="2026" t="5107" r="2371" b="5261">task</wd>

<space/>

<wd l="2434" t="5155" r="2722" b="5304">e.g.</wd>

<space/>

<wd l="2827" t="5107" r="3374" b="5261">tokens</wd>

<space/>

<wd l="3442" t="5107" r="3763" b="5261">that</wd>

<space/>

<wd l="3821" t="5107" r="4210" b="5261">have</wd>

<space/>

<wd l="4272" t="5107" r="5808" b="5304">non-alphanumeric</wd>

<space/>

</ln>

<ln l="1416" t="5371" r="5808" b="5568" baseLine="5515" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="5371" r="2266" b="5525">characters</wd>

<space/>

<wd l="2352" t="5371" r="2784" b="5525">other</wd>

<space/>

<wd l="2861" t="5371" r="3221" b="5525">than</wd>

<space/>

<wd l="3307" t="5419" r="3499" b="5525">an</wd>

<space/>

<wd l="3586" t="5371" r="4762" b="5568">apostrophe(’).</wd>

<space/>

<wd l="4915" t="5371" r="5472" b="5568">Lastly,</wd>

<space/>

<wd l="5568" t="5419" r="5808" b="5525">we</wd>

<space/>

</ln>

<ln l="1416" t="5635" r="5808" b="5832" baseLine="5779" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="5635" r="1814" b="5789">filter</wd>

<space/>

<wd l="1891" t="5659" r="2160" b="5789">out</wd>

<space/>

<wd l="2227" t="5635" r="2674" b="5789">those</wd>

<space/>

<wd l="2750" t="5635" r="3293" b="5789">tokens</wd>

<space/>

<wd l="3370" t="5635" r="3691" b="5789">that</wd>

<space/>

<wd l="3768" t="5635" r="4234" b="5789">could</wd>

<space/>

<wd l="4306" t="5635" r="4502" b="5789">be</wd>

<space/>

<wd l="4574" t="5635" r="5530" b="5789">normalized</wd>

<space/>

<wd l="5602" t="5635" r="5808" b="5832">by</wd>

<space/>

</ln>

<ln l="1416" t="5899" r="5803" b="6096" baseLine="6043" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="5947" r="1694" b="6053">our</wd>

<space/>

<wd l="1771" t="5899" r="2650" b="6053">rule-based</wd>

<space/>

<wd l="2731" t="5923" r="3739" b="6096">components</wd>

<space/>

<wd l="3830" t="5899" r="4699" b="6086">(described</wd>

<space/>

<wd l="4776" t="5899" r="4939" b="6048">in</wd>

<space/>

<wd l="5016" t="5923" r="5381" b="6053">next</wd>

<space/>

<wd l="5458" t="5899" r="5803" b="6053">sub-</wd>

</ln>

<ln l="1421" t="6158" r="2122" b="6346" baseLine="6307" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="6158" r="2122" b="6346">section).</wd>

</ln>

</para>

<para l="1411" t="6442" r="5818" b="9782" alignment="justified" spaceBefore="29" fli="216" lsp="exactly" lspExact="264" language="en">

<ln l="1622" t="6442" r="5808" b="6595" baseLine="6590" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1622" t="6446" r="1901" b="6595">We</wd>

<space/>

<wd l="1958" t="6466" r="2424" b="6595">count</wd>

<space/>

<wd l="2472" t="6442" r="2726" b="6595">the</wd>

<space/>

<wd l="2784" t="6490" r="3787" b="6595">occurrences</wd>

<space/>

<wd l="3845" t="6442" r="4027" b="6595">of</wd>

<space/>

<wd l="4070" t="6442" r="4454" b="6595">each</wd>

<space/>

<wd l="4512" t="6442" r="5808" b="6595">OOV-candidate</wd>

<space/>

</ln>

<ln l="1411" t="6706" r="5808" b="6902" baseLine="6850" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="6706" r="1882" b="6859">token</wd>

<space/>

<wd l="1925" t="6730" r="2333" b="6902">type,</wd>

<space/>

<wd l="2395" t="6734" r="2707" b="6859">sort</wd>

<space/>

<wd l="2746" t="6706" r="2952" b="6902">by</wd>

<space/>

<wd l="3005" t="6706" r="3259" b="6859">the</wd>

<space/>

<wd l="3307" t="6730" r="3816" b="6888">count,</wd>

<space/>

<wd l="3878" t="6706" r="4176" b="6859">and</wd>

<space/>

<wd l="4224" t="6730" r="4728" b="6859">return</wd>

<space/>

<wd l="4771" t="6706" r="5026" b="6859">the</wd>

<space/>

<wd l="5074" t="6706" r="5808" b="6902">resulting</wd>

<space/>

</ln>

<ln l="1411" t="6970" r="5813" b="7166" baseLine="7114" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="6970" r="1709" b="7123">list.</wd>

<space/>

<wd l="1853" t="6970" r="2222" b="7123">This</wd>

<space/>

<wd l="2304" t="6994" r="2650" b="7166">puts</wd>

<space/>

<wd l="2731" t="6970" r="2986" b="7123">the</wd>

<space/>

<wd l="3062" t="6994" r="3475" b="7123">most</wd>

<space/>

<wd l="3547" t="6970" r="4061" b="7123">useful</wd>

<space/>

<wd l="4142" t="6970" r="5026" b="7123">candidates</wd>

<space/>

<wd l="5107" t="6970" r="5438" b="7123">first</wd>

<space/>

<wd l="5510" t="6970" r="5813" b="7123">and</wd>

<space/>

</ln>

<ln l="1411" t="7234" r="5803" b="7430" baseLine="7378" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="7234" r="2131" b="7430">provides</wd>

<space/>

<wd l="2208" t="7234" r="2458" b="7387">for</wd>

<space/>

<wd l="2534" t="7234" r="3221" b="7387">efficient</wd>

<space/>

<wd l="3288" t="7282" r="3566" b="7387">use</wd>

<space/>

<wd l="3643" t="7234" r="3826" b="7387">of</wd>

<space/>

<wd l="3893" t="7234" r="4781" b="7387">annotation</wd>

<space/>

<wd l="4853" t="7234" r="5266" b="7387">time.</wd>

<space/>

<wd l="5405" t="7234" r="5803" b="7387">Suit-</wd>

</ln>

<ln l="1416" t="7493" r="5803" b="7690" baseLine="7642" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1416" t="7493" r="1762" b="7646">able</wd>

<space/>

<wd l="1814" t="7493" r="2923" b="7690">replacements</wd>

<space/>

<wd l="2976" t="7493" r="3576" b="7690">require</wd>

<space/>

<wd l="3624" t="7493" r="4195" b="7646">human</wd>

<space/>

<wd l="4229" t="7493" r="5146" b="7690">judgement</wd>

<space/>

<wd l="5194" t="7493" r="5496" b="7646">and</wd>

<space/>

<wd l="5549" t="7541" r="5803" b="7646">oc-</wd>

</ln>

<ln l="1416" t="7757" r="5808" b="7910" baseLine="7901" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1416" t="7757" r="2107" b="7910">casional</wd>

<space/>

<wd l="2184" t="7757" r="2966" b="7910">reference</wd>

<space/>

<wd l="3048" t="7781" r="3206" b="7910">to</wd>

<space/>

<wd l="3293" t="7757" r="3898" b="7910">outside</wd>

<space/>

<wd l="3984" t="7805" r="4646" b="7910">sources.</wd>

<space/>

<wd l="4795" t="7757" r="5122" b="7910">The</wd>

<space/>

<wd l="5208" t="7757" r="5808" b="7910">outside</wd>

<space/>

</ln>

<ln l="1421" t="8021" r="5808" b="8218" baseLine="8165" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="8069" r="2040" b="8174">sources</wd>

<space/>

<wd l="2131" t="8069" r="2539" b="8174">were</wd>

<space/>

<wd l="2640" t="8026" r="2866" b="8208">(1)</wd>

<space/>

<wd l="2962" t="8021" r="3485" b="8174">Urban</wd>

<space/>

<wd l="3576" t="8021" r="4507" b="8218">Dictionary,</wd>

<space/>

<wd l="4613" t="8021" r="5131" b="8174">which</wd>

<space/>

<wd l="5213" t="8021" r="5352" b="8174">is</wd>

<space/>

<wd l="5438" t="8069" r="5808" b="8218">very</wd>

<space/>

</ln>

<ln l="1411" t="8285" r="5813" b="8482" baseLine="8429" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="8285" r="1930" b="8438">useful</wd>

<space/>

<wd l="2021" t="8285" r="2270" b="8438">for</wd>

<space/>

<wd l="2371" t="8285" r="2803" b="8482">slang</wd>

<space/>

<wd l="2904" t="8285" r="3202" b="8438">and</wd>

<space/>

<wd l="3298" t="8333" r="4109" b="8482">acronyms</wd>

<space/>

<wd l="4214" t="8290" r="4445" b="8472">(2)</wd>

<space/>

<wd l="4546" t="8285" r="5184" b="8467">Twitter,</wd>

<space/>

<wd l="5294" t="8285" r="5813" b="8438">which</wd>

<space/>

</ln>

<ln l="1411" t="8544" r="5803" b="8741" baseLine="8693" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="8544" r="1762" b="8698">tells</wd>

<space/>

<wd l="1848" t="8592" r="2165" b="8741">you</wd>

<space/>

<wd l="2246" t="8544" r="2597" b="8698">how</wd>

<space/>

<wd l="2688" t="8592" r="2779" b="8698">a</wd>

<space/>

<wd l="2856" t="8544" r="3288" b="8698">word</wd>

<space/>

<wd l="3370" t="8544" r="3509" b="8698">is</wd>

<space/>

<wd l="3590" t="8568" r="4008" b="8698">most</wd>

<space/>

<wd l="4090" t="8544" r="4517" b="8698">often</wd>

<space/>

<wd l="4598" t="8544" r="4987" b="8698">used</wd>

<space/>

<wd l="5074" t="8592" r="5280" b="8698">on</wd>

<space/>

<wd l="5362" t="8544" r="5803" b="8698">Twit-</wd>

</ln>

<ln l="1411" t="8808" r="5813" b="9005" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="8832" r="1670" b="8990">ter,</wd>

<space/>

<wd l="1757" t="8808" r="2059" b="8962">and</wd>

<space/>

<wd l="2136" t="8813" r="2366" b="8995">(3)</wd>

<space/>

<wd l="2443" t="8808" r="2698" b="8962">the</wd>

<space/>

<wd l="2770" t="8808" r="3422" b="9005">training</wd>

<space/>

<wd l="3509" t="8832" r="3734" b="8962">set</wd>

<space/>

<wd l="3802" t="8808" r="4550" b="9005">provided</wd>

<space/>

<wd l="4618" t="8808" r="4867" b="8962">for</wd>

<space/>

<wd l="4934" t="8808" r="5189" b="8962">the</wd>

<space/>

<wd l="5270" t="8808" r="5813" b="8962">shared</wd>

<space/>

</ln>

<ln l="1411" t="9072" r="5803" b="9269" baseLine="9216" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1411" t="9072" r="1795" b="9254">task,</wd>

<space/>

<wd l="1882" t="9072" r="2400" b="9226">which</wd>

<space/>

<wd l="2472" t="9072" r="2818" b="9226">tells</wd>

<space/>

<wd l="2899" t="9120" r="3216" b="9269">you</wd>

<space/>

<wd l="3288" t="9072" r="3643" b="9226">how</wd>

<space/>

<wd l="3715" t="9096" r="3878" b="9226">to</wd>

<space/>

<wd l="3955" t="9072" r="4795" b="9226">normalize</wd>

<space/>

<wd l="4872" t="9072" r="5035" b="9221">in</wd>

<space/>

<wd l="5112" t="9072" r="5803" b="9269">ambigu-</wd>

</ln>

<ln l="1416" t="9336" r="5818" b="9533" baseLine="9480" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1416" t="9384" r="1699" b="9490">ous</wd>

<space/>

<wd l="1790" t="9384" r="2222" b="9490">cases</wd>

<space/>

<wd l="2314" t="9341" r="2669" b="9533">(e.g.</wd>

<space/>

<wd l="2822" t="9336" r="3648" b="9533">“laughing</wd>

<space/>

<wd l="3734" t="9360" r="4003" b="9490">out</wd>

<space/>

<wd l="4075" t="9336" r="4536" b="9490">loud”</wd>

<space/>

<wd l="4622" t="9389" r="4752" b="9490">v.</wd>

<space/>

<wd l="4906" t="9336" r="5467" b="9533">“laugh</wd>

<space/>

<wd l="5549" t="9360" r="5818" b="9490">out</wd>

<space/>

</ln>

<ln l="1411" t="9562" r="2083" b="9782" baseLine="9726">

<wd l="1411" t="9562" r="2083" b="9782"><run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">loud”).</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1411" t="9878" r="5813" b="11083" alignment="justified" spaceBefore="20" fli="216" lsp="exactly" lspExact="261" language="en">

<ln l="1622" t="9878" r="5808" b="10061" baseLine="10027" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="9883" r="1800" b="10027">In</wd>

<space/>

<wd l="1930" t="9878" r="2616" b="10032">addition</wd>

<space/>

<wd l="2741" t="9902" r="2899" b="10032">to</wd>

<space/>

<wd l="3024" t="9878" r="3456" b="10032">these</wd>

<space/>

<wd l="3581" t="9878" r="4690" b="10032">hand-curated</wd>

<space/>

<wd l="4814" t="9878" r="5419" b="10061">entries,</wd>

<space/>

<wd l="5568" t="9926" r="5808" b="10032">we</wd>

<space/>

</ln>

<ln l="1416" t="10142" r="5803" b="10325" baseLine="10286" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10142" r="1920" b="10296">added</wd>

<space/>

<wd l="2117" t="10142" r="2371" b="10296">the</wd>

<space/>

<wd l="2573" t="10142" r="3202" b="10296">Lexical</wd>

<space/>

<wd l="3398" t="10142" r="4574" b="10296">normalization</wd>

<space/>

<wd l="4776" t="10142" r="5803" b="10325">dictionaries,</wd>

<space/>

</ln>

<ln l="1411" t="10406" r="5813" b="10603" baseLine="10550" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10406" r="2174" b="10560">UniMelb</wd>

<space/>

<wd l="2290" t="10406" r="2587" b="10560">and</wd>

<space/>

<wd l="2693" t="10406" r="3562" b="10589">UTDallas,</wd>

<space/>

<wd l="3691" t="10406" r="4440" b="10603">provided</wd>

<space/>

<wd l="4546" t="10406" r="4790" b="10560">for</wd>

<space/>

<wd l="4896" t="10406" r="5150" b="10560">the</wd>

<space/>

<wd l="5270" t="10406" r="5813" b="10560">shared</wd>

<space/>

</ln>

<ln l="1411" t="10670" r="5808" b="10867" baseLine="10814" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10670" r="1790" b="10824">task.</wd>

<space/>

<wd l="1872" t="10675" r="2333" b="10824">From</wd>

<space/>

<wd l="2381" t="10670" r="2813" b="10824">these</wd>

<space/>

<wd l="2870" t="10670" r="3206" b="10824">lists</wd>

<space/>

<wd l="3264" t="10718" r="3504" b="10824">we</wd>

<space/>

<wd l="3562" t="10670" r="3941" b="10824">took</wd>

<space/>

<wd l="3994" t="10670" r="4195" b="10824">all</wd>

<space/>

<wd l="4258" t="10670" r="4810" b="10824">entries</wd>

<space/>

<wd l="4867" t="10694" r="5141" b="10824">not</wd>

<space/>

<wd l="5194" t="10670" r="5808" b="10867">already</wd>

<space/>

</ln>

<ln l="1411" t="10896" r="3490" b="11083" baseLine="11072">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1411" t="10930" r="1579" b="11078">in</wd>

<space/>

<wd l="1632" t="10930" r="1886" b="11083">the</wd>

<space/>

<wd l="1944" t="10930" r="3048" b="11083">hand-curated</wd>

<space/>

</run>

<wd l="3101" t="10896" r="3490" b="11083"><run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">list.</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1411" t="11213" r="5818" b="14304" alignment="justified" spaceAfter="179" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="1622" t="11213" r="5803" b="11410" baseLine="11362" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1622" t="11213" r="2040" b="11366">With</wd>

<space/>

<wd l="2083" t="11213" r="2386" b="11366">this</wd>

<space/>

<wd l="2434" t="11213" r="2928" b="11366">initial</wd>

<space/>

<wd l="2971" t="11213" r="3235" b="11366">list</wd>

<space/>

<wd l="3274" t="11213" r="3437" b="11362">in</wd>

<space/>

<wd l="3485" t="11213" r="3974" b="11410">place,</wd>

<space/>

<wd l="4027" t="11261" r="4272" b="11366">we</wd>

<space/>

<wd l="4320" t="11261" r="4584" b="11366">ran</wd>

<space/>

<wd l="4632" t="11213" r="4757" b="11366">it</wd>

<space/>

<wd l="4800" t="11261" r="5006" b="11366">on</wd>

<space/>

<wd l="5050" t="11213" r="5309" b="11366">the</wd>

<space/>

<wd l="5357" t="11213" r="5803" b="11366">train-</wd>

</ln>

<ln l="1411" t="11477" r="5808" b="11674" baseLine="11621" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="11477" r="1680" b="11674">ing</wd>

<space/>

<wd l="1752" t="11501" r="1982" b="11630">set</wd>

<space/>

<wd l="2045" t="11477" r="2342" b="11630">and</wd>

<space/>

<wd l="2410" t="11477" r="3154" b="11674">analyzed</wd>

<space/>

<wd l="3216" t="11477" r="3470" b="11630">the</wd>

<space/>

<wd l="3538" t="11525" r="4070" b="11659">errors,</wd>

<space/>

<wd l="4142" t="11477" r="4786" b="11674">looking</wd>

<space/>

<wd l="4858" t="11477" r="5808" b="11674">specifically</wd>

<space/>

</ln>

<ln l="1416" t="11741" r="5808" b="11938" baseLine="11885" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="11765" r="1570" b="11894">at</wd>

<space/>

<wd l="1627" t="11741" r="2021" b="11894">false</wd>

<space/>

<wd l="2083" t="11741" r="2822" b="11938">positives</wd>

<space/>

<wd l="2894" t="11741" r="3192" b="11894">and</wd>

<space/>

<wd l="3254" t="11741" r="3648" b="11894">false</wd>

<space/>

<wd l="3710" t="11741" r="4536" b="11938">negatives.</wd>

<space/>

<wd l="4642" t="11746" r="4915" b="11894">We</wd>

<space/>

<wd l="4987" t="11741" r="5496" b="11894">sorted</wd>

<space/>

<wd l="5554" t="11741" r="5808" b="11894">the</wd>

<space/>

</ln>

<ln l="1411" t="12000" r="5803" b="12197" baseLine="12149" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="12000" r="1958" b="12154">tokens</wd>

<space/>

<wd l="2040" t="12000" r="2362" b="12154">that</wd>

<space/>

<wd l="2443" t="12000" r="3014" b="12154">caused</wd>

<space/>

<wd l="3091" t="12000" r="3523" b="12154">these</wd>

<space/>

<wd l="3610" t="12048" r="4090" b="12154">errors</wd>

<space/>

<wd l="4181" t="12000" r="5002" b="12197">according</wd>

<space/>

<wd l="5088" t="12024" r="5246" b="12154">to</wd>

<space/>

<wd l="5333" t="12048" r="5424" b="12154">a</wd>

<space/>

<wd l="5501" t="12000" r="5803" b="12154">for-</wd>

</ln>

<ln l="1411" t="12264" r="5808" b="12461" baseLine="12413" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="12264" r="1838" b="12418">mula</wd>

<space/>

<wd l="1886" t="12264" r="2208" b="12418">that</wd>

<space/>

<wd l="2261" t="12264" r="3067" b="12418">estimated</wd>

<space/>

<wd l="3120" t="12264" r="3538" b="12418">what</wd>

<space/>

<wd l="3590" t="12264" r="4181" b="12461">change</wd>

<space/>

<wd l="4238" t="12264" r="4762" b="12418">would</wd>

<space/>

<wd l="4819" t="12312" r="5290" b="12418">occur</wd>

<space/>

<wd l="5338" t="12288" r="5496" b="12418">to</wd>

<space/>

<wd l="5554" t="12264" r="5808" b="12418">the</wd>

<space/>

</ln>

<ln l="1411" t="12528" r="5808" b="12682" baseLine="12672" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="12528" r="1997" b="12682">f-score</wd>

<space/>

<wd l="2069" t="12528" r="2208" b="12677">if</wd>

<space/>

<wd l="2266" t="12528" r="2525" b="12682">the</wd>

<space/>

<wd l="2592" t="12528" r="3062" b="12682">token</wd>

<space/>

<wd l="3130" t="12576" r="3451" b="12682">was</wd>

<space/>

<wd l="3528" t="12552" r="3686" b="12682">to</wd>

<space/>

<wd l="3758" t="12528" r="3955" b="12682">be</wd>

<space/>

<wd l="4027" t="12528" r="4762" b="12682">removed</wd>

<space/>

<wd l="4834" t="12576" r="5006" b="12682">or</wd>

<space/>

<wd l="5078" t="12528" r="5578" b="12682">added</wd>

<space/>

<wd l="5650" t="12552" r="5808" b="12682">to</wd>

<space/>

</ln>

<ln l="1411" t="12792" r="5808" b="12989" baseLine="12936" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="12792" r="1670" b="12946">the</wd>

<space/>

<wd l="1723" t="12792" r="2021" b="12946">list.</wd>

<space/>

<wd l="2102" t="12792" r="2256" b="12941">If</wd>

<space/>

<wd l="2294" t="12792" r="2554" b="12946">the</wd>

<space/>

<wd l="2606" t="12792" r="3077" b="12946">token</wd>

<space/>

<wd l="3130" t="12792" r="4104" b="12989">represented</wd>

<space/>

<wd l="4162" t="12840" r="4253" b="12946">a</wd>

<space/>

<wd l="4306" t="12792" r="4699" b="12946">false</wd>

<space/>

<wd l="4757" t="12792" r="5506" b="12989">negative,</wd>

<space/>

<wd l="5568" t="12840" r="5808" b="12946">we</wd>

<space/>

</ln>

<ln l="1411" t="13051" r="5818" b="13248" baseLine="13200" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="13051" r="1939" b="13205">would</wd>

<space/>

<wd l="2011" t="13051" r="2707" b="13205">estimate</wd>

<space/>

<wd l="2774" t="13051" r="3034" b="13205">the</wd>

<space/>

<wd l="3106" t="13051" r="3696" b="13248">change</wd>

<space/>

<wd l="3768" t="13075" r="3926" b="13205">to</wd>

<space/>

<wd l="3998" t="13051" r="4579" b="13205">f-score</wd>

<space/>

<wd l="4651" t="13099" r="4891" b="13205">we</wd>

<space/>

<wd l="4963" t="13051" r="5486" b="13205">would</wd>

<space/>

<wd l="5558" t="13075" r="5818" b="13248">get</wd>

<space/>

</ln>

<ln l="1411" t="13315" r="5803" b="13512" baseLine="13464" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="13315" r="1618" b="13512">by</wd>

<space/>

<wd l="1704" t="13315" r="2270" b="13512">adding</wd>

<space/>

<wd l="2347" t="13315" r="2467" b="13469">it</wd>

<space/>

<wd l="2534" t="13339" r="2693" b="13469">to</wd>

<space/>

<wd l="2774" t="13315" r="3029" b="13469">the</wd>

<space/>

<wd l="3101" t="13315" r="3403" b="13498">list,</wd>

<space/>

<wd l="3494" t="13315" r="4286" b="13512">assuming</wd>

<space/>

<wd l="4363" t="13315" r="4685" b="13469">that</wd>

<space/>

<wd l="4752" t="13315" r="5006" b="13469">it’s</wd>

<space/>

<wd l="5093" t="13315" r="5803" b="13469">substitu-</wd>

</ln>

<ln l="1411" t="13579" r="5813" b="13776" baseLine="13723" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="13579" r="1742" b="13733">tion</wd>

<space/>

<wd l="1786" t="13579" r="2314" b="13733">would</wd>

<space/>

<wd l="2362" t="13579" r="2938" b="13776">always</wd>

<space/>

<wd l="2986" t="13579" r="3182" b="13733">be</wd>

<space/>

<wd l="3230" t="13579" r="3485" b="13733">the</wd>

<space/>

<wd l="3533" t="13579" r="3965" b="13733">word</wd>

<space/>

<wd l="4008" t="13603" r="4426" b="13733">most</wd>

<space/>

<wd l="4469" t="13579" r="4901" b="13733">often</wd>

<space/>

<wd l="4949" t="13579" r="5813" b="13733">associated</wd>

<space/>

</ln>

<ln l="1411" t="13843" r="5808" b="14040" baseLine="13987" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="13843" r="1790" b="13997">with</wd>

<space/>

<wd l="1843" t="13843" r="1963" b="13997">it</wd>

<space/>

<wd l="2011" t="13843" r="2174" b="13992">in</wd>

<space/>

<wd l="2232" t="13843" r="2486" b="13997">the</wd>

<space/>

<wd l="2539" t="13843" r="3192" b="14040">training</wd>

<space/>

<wd l="3259" t="13867" r="3523" b="13997">set.</wd>

<space/>

<wd l="3605" t="13843" r="3758" b="13992">If</wd>

<space/>

<wd l="3797" t="13843" r="3922" b="13997">it</wd>

<space/>

<wd l="3970" t="13891" r="4291" b="13997">was</wd>

<space/>

<wd l="4349" t="13843" r="4742" b="13997">false</wd>

<space/>

<wd l="4800" t="13843" r="5506" b="14040">positive,</wd>

<space/>

<wd l="5568" t="13891" r="5808" b="13997">we</wd>

<space/>

</ln>

<ln l="1411" t="14107" r="5818" b="14304" baseLine="14251" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="14107" r="1939" b="14261">would</wd>

<space/>

<wd l="2011" t="14107" r="2707" b="14261">estimate</wd>

<space/>

<wd l="2774" t="14107" r="3034" b="14261">the</wd>

<space/>

<wd l="3106" t="14107" r="3696" b="14304">change</wd>

<space/>

<wd l="3768" t="14131" r="3926" b="14261">to</wd>

<space/>

<wd l="3998" t="14107" r="4579" b="14261">f-score</wd>

<space/>

<wd l="4651" t="14155" r="4891" b="14261">we</wd>

<space/>

<wd l="4963" t="14107" r="5486" b="14261">would</wd>

<space/>

<wd l="5558" t="14131" r="5818" b="14304">get</wd>

</ln>

</para>

<rulerline l="1399" t="14496" r="2578" b="14496" type="single" width="10" color="000000"/>

<para l="1661" t="14549" r="4219" b="14746" alignment="left" li="216" spaceBefore="58" spaceAfter="23" lsp="exactly" lspExact="203" language="en">

<ln l="1661" t="14549" r="4219" b="14746" baseLine="14699">

<wd l="1661" t="14549" r="1978" b="14712"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">2</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">See</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2026" t="14587" r="2808" b="14712">“Resources</wd>

<space/>

<wd l="2856" t="14587" r="3643" b="14746">Employed”</wd>

<space/>

<wd l="3696" t="14587" r="4219" b="14712">section.</wd>

</run>

</ln>

</para>

</column>

<column l="6072" t="2134" r="10507" b="14777">

<para l="6091" t="2198" r="8338" b="2395" alignment="justified" lsp="exactly" lspExact="259" language="en">

<ln l="6091" t="2198" r="8338" b="2395" baseLine="2342" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="2198" r="6298" b="2395">by</wd>

<space/>

<wd l="6360" t="2198" r="7032" b="2395">deleting</wd>

<space/>

<wd l="7090" t="2198" r="7214" b="2352">it</wd>

<space/>

<wd l="7262" t="2198" r="7675" b="2352">from</wd>

<space/>

<wd l="7728" t="2198" r="7982" b="2352">the</wd>

<space/>

<wd l="8040" t="2198" r="8338" b="2352">list.</wd>

</ln>

</para>

<para l="6091" t="2462" r="10498" b="5246" alignment="justified" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="6302" t="2462" r="10478" b="2659" baseLine="2606" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6302" t="2462" r="6677" b="2616">This</wd>

<space/>

<wd l="6730" t="2462" r="7402" b="2659">analysis</wd>

<space/>

<wd l="7450" t="2462" r="8160" b="2616">revealed</wd>

<space/>

<wd l="8213" t="2510" r="8650" b="2616">some</wd>

<space/>

<wd l="8698" t="2462" r="9149" b="2616">weak</wd>

<space/>

<wd l="9197" t="2486" r="9619" b="2659">spots</wd>

<space/>

<wd l="9667" t="2462" r="9830" b="2611">in</wd>

<space/>

<wd l="9878" t="2462" r="10133" b="2616">the</wd>

<space/>

<wd l="10181" t="2462" r="10478" b="2616">list.</wd>

<space/>

</ln>

<ln l="6091" t="2726" r="10493" b="2923" baseLine="2870" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="2726" r="6523" b="2909">First,</wd>

<space/>

<wd l="6595" t="2726" r="7018" b="2880">there</wd>

<space/>

<wd l="7080" t="2774" r="7488" b="2880">were</wd>

<space/>

<wd l="7560" t="2774" r="7651" b="2880">a</wd>

<space/>

<wd l="7709" t="2726" r="8357" b="2880">number</wd>

<space/>

<wd l="8419" t="2726" r="8602" b="2880">of</wd>

<space/>

<wd l="8654" t="2726" r="9048" b="2880">false</wd>

<space/>

<wd l="9110" t="2726" r="9850" b="2923">positives</wd>

<space/>

<wd l="9922" t="2726" r="10493" b="2880">caused</wd>

<space/>

</ln>

<ln l="6091" t="2990" r="10483" b="3187" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="2990" r="6298" b="3187">by</wd>

<space/>

<wd l="6389" t="2990" r="7114" b="3187">differing</wd>

<space/>

<wd l="7200" t="2990" r="7757" b="3144">beliefs</wd>

<space/>

<wd l="7843" t="2990" r="8645" b="3187">regarding</wd>

<space/>

<wd l="8726" t="2990" r="9144" b="3144">what</wd>

<space/>

<wd l="9221" t="3014" r="9763" b="3144">counts</wd>

<space/>

<wd l="9854" t="3038" r="10018" b="3144">as</wd>

<space/>

<wd l="10104" t="3038" r="10483" b="3144">non-</wd>

</ln>

<ln l="6101" t="3250" r="10488" b="3446" baseLine="3398" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6101" t="3250" r="6850" b="3403">standard.</wd>

<space/>

<wd l="6922" t="3254" r="7214" b="3403">For</wd>

<space/>

<wd l="7258" t="3250" r="8011" b="3446">example,</wd>

<space/>

<wd l="8064" t="3250" r="8486" b="3403">there</wd>

<space/>

<wd l="8534" t="3298" r="8784" b="3403">are</wd>

<space/>

<wd l="8837" t="3250" r="9418" b="3403">several</wd>

<space/>

<wd l="9461" t="3250" r="10488" b="3403">contractions</wd>

<space/>

</ln>

<ln l="6101" t="3514" r="10498" b="3710" baseLine="3658" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6101" t="3518" r="6456" b="3710">(e.g.</wd>

<space/>

<wd l="6614" t="3518" r="7358" b="3710">“gonna”,</wd>

<space/>

<wd l="7454" t="3518" r="8102" b="3710">“gotta”,</wd>

<space/>

<wd l="8203" t="3518" r="8981" b="3696">“wanna”,</wd>

<space/>

<wd l="9077" t="3514" r="9379" b="3667">and</wd>

<space/>

<wd l="9461" t="3514" r="10090" b="3701">“ain’t”)</wd>

<space/>

<wd l="10176" t="3514" r="10498" b="3667">that</wd>

<space/>

</ln>

<ln l="6096" t="3778" r="10493" b="3974" baseLine="3922" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6096" t="3826" r="6350" b="3931">are</wd>

<space/>

<wd l="6427" t="3802" r="6701" b="3931">not</wd>

<space/>

<wd l="6768" t="3778" r="7378" b="3974">usually</wd>

<space/>

<wd l="7459" t="3778" r="8371" b="3931">considered</wd>

<space/>

<wd l="8458" t="3778" r="9163" b="3931">standard</wd>

<space/>

<wd l="9250" t="3778" r="9797" b="3974">(rarely</wd>

<space/>

<wd l="9888" t="3826" r="10253" b="3931">seen</wd>

<space/>

<wd l="10330" t="3778" r="10493" b="3926">in</wd>

<space/>

</ln>

<ln l="6091" t="4042" r="10493" b="4238" baseLine="4186" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="4042" r="6418" b="4195">The</wd>

<space/>

<wd l="6485" t="4042" r="6878" b="4195">Wall</wd>

<space/>

<wd l="6946" t="4046" r="7435" b="4195">Street</wd>

<space/>

<wd l="7498" t="4042" r="8232" b="4229">Journal),</wd>

<space/>

<wd l="8309" t="4042" r="8611" b="4195">and</wd>

<space/>

<wd l="8674" t="4042" r="9062" b="4195">have</wd>

<space/>

<wd l="9139" t="4042" r="9768" b="4238">straight</wd>

<space/>

<wd l="9826" t="4042" r="10493" b="4195">forward</wd>

<space/>

</ln>

<ln l="6091" t="4301" r="10488" b="4483" baseLine="4450" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="4301" r="7392" b="4483">normalizations,</wd>

<space/>

<wd l="7498" t="4301" r="7819" b="4454">that</wd>

<space/>

<wd l="7910" t="4349" r="8160" b="4454">are</wd>

<space/>

<wd l="8251" t="4301" r="9230" b="4454">nonetheless</wd>

<space/>

<wd l="9326" t="4301" r="10238" b="4454">considered</wd>

<space/>

<wd l="10330" t="4325" r="10488" b="4454">to</wd>

<space/>

</ln>

<ln l="6091" t="4565" r="10483" b="4762" baseLine="4709" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="4565" r="6288" b="4718">be</wd>

<space/>

<wd l="6355" t="4565" r="7517" b="4762">in-vocabulary</wd>

<space/>

<wd l="7589" t="4565" r="7752" b="4714">in</wd>

<space/>

<wd l="7814" t="4565" r="8074" b="4718">the</wd>

<space/>

<wd l="8141" t="4565" r="8520" b="4718">task.</wd>

<space/>

<wd l="8635" t="4565" r="9134" b="4718">These</wd>

<space/>

<wd l="9202" t="4565" r="9710" b="4718">words</wd>

<space/>

<wd l="9782" t="4613" r="10190" b="4718">were</wd>

<space/>

<wd l="10258" t="4613" r="10483" b="4718">re-</wd>

</ln>

<ln l="6091" t="4829" r="10488" b="5011" baseLine="4973" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="4829" r="6662" b="4982">moved</wd>

<space/>

<wd l="6710" t="4829" r="7128" b="4982">from</wd>

<space/>

<wd l="7171" t="4829" r="7426" b="4982">the</wd>

<space/>

<wd l="7488" t="4829" r="8467" b="4982">substitution</wd>

<space/>

<wd l="8515" t="4829" r="8818" b="5011">list,</wd>

<space/>

<wd l="8885" t="4829" r="9182" b="4982">and</wd>

<space/>

<wd l="9240" t="4829" r="9739" b="4982">added</wd>

<space/>

<wd l="9787" t="4853" r="9946" b="4982">to</wd>

<space/>

<wd l="10008" t="4877" r="10099" b="4982">a</wd>

<space/>

<wd l="10147" t="4877" r="10488" b="4982">new</wd>

<space/>

</ln>

<ln l="6091" t="5093" r="8717" b="5246" baseLine="5237" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="5093" r="6658" b="5246">list—a</wd>

<space/>

<wd l="6715" t="5093" r="8362" b="5246">“do-not-normalize”</wd>

<space/>

<wd l="8419" t="5093" r="8717" b="5246">list.</wd>

</ln>

</para>

<para l="6091" t="5357" r="10502" b="7877" alignment="justified" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="6302" t="5357" r="10502" b="5539" baseLine="5501" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="5357" r="7406" b="5539">Furthermore,</wd>

<space/>

<wd l="7507" t="5357" r="7930" b="5510">there</wd>

<space/>

<wd l="8016" t="5405" r="8424" b="5510">were</wd>

<space/>

<wd l="8515" t="5357" r="8698" b="5510">of</wd>

<space/>

<wd l="8774" t="5405" r="9322" b="5510">course</wd>

<space/>

<wd l="9413" t="5405" r="9504" b="5510">a</wd>

<space/>

<wd l="9586" t="5357" r="10234" b="5510">number</wd>

<space/>

<wd l="10320" t="5357" r="10502" b="5510">of</wd>

<space/>

</ln>

<ln l="6091" t="5621" r="10488" b="5818" baseLine="5765" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="5621" r="6485" b="5774">false</wd>

<space/>

<wd l="6571" t="5621" r="7397" b="5818">negatives.</wd>

<space/>

<wd l="7565" t="5626" r="8050" b="5818">Many</wd>

<space/>

<wd l="8141" t="5621" r="8323" b="5774">of</wd>

<space/>

<wd l="8390" t="5621" r="8822" b="5774">these</wd>

<space/>

<wd l="8914" t="5669" r="9365" b="5774">come</wd>

<space/>

<wd l="9451" t="5621" r="9864" b="5774">from</wd>

<space/>

<wd l="9941" t="5621" r="10488" b="5774">tokens</wd>

<space/>

</ln>

<ln l="6091" t="5880" r="10493" b="6077" baseLine="6029" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="5880" r="6413" b="6034">that</wd>

<space/>

<wd l="6466" t="5928" r="6720" b="6034">are</wd>

<space/>

<wd l="6773" t="5880" r="6936" b="6029">in</wd>

<space/>

<wd l="6989" t="5880" r="7248" b="6034">the</wd>

<space/>

<wd l="7306" t="5880" r="8189" b="6077">dictionary,</wd>

<space/>

<wd l="8251" t="5880" r="8525" b="6034">but</wd>

<space/>

<wd l="8573" t="5880" r="8894" b="6034">that</wd>

<space/>

<wd l="8947" t="5928" r="9197" b="6034">are</wd>

<space/>

<wd l="9259" t="5880" r="9686" b="6034">often</wd>

<space/>

<wd l="9739" t="5880" r="10128" b="6034">used</wd>

<space/>

<wd l="10181" t="5880" r="10344" b="6029">in</wd>

<space/>

<wd l="10402" t="5928" r="10493" b="6034">a</wd>

<space/>

</ln>

<ln l="6091" t="6144" r="10483" b="6341" baseLine="6288" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6144" r="7195" b="6298">non-standard</wd>

<space/>

<wd l="7253" t="6192" r="7598" b="6341">way</wd>

<space/>

<wd l="7661" t="6144" r="7829" b="6293">in</wd>

<space/>

<wd l="7886" t="6144" r="8611" b="6298">informal</wd>

<space/>

<wd l="8678" t="6144" r="9283" b="6341">speech.</wd>

<space/>

<wd l="9379" t="6149" r="9672" b="6298">For</wd>

<space/>

<wd l="9730" t="6144" r="10483" b="6341">example,</wd>

<space/>

</ln>

<ln l="6096" t="6408" r="10483" b="6605" baseLine="6552" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="6408" r="6542" b="6562">“wit”</wd>

<space/>

<wd l="6619" t="6408" r="6754" b="6562">is</wd>

<space/>

<wd l="6826" t="6408" r="6989" b="6557">in</wd>

<space/>

<wd l="7066" t="6408" r="7776" b="6562">standard</wd>

<space/>

<wd l="7848" t="6408" r="8870" b="6590">dictionaries,</wd>

<space/>

<wd l="8952" t="6408" r="9686" b="6605">referring</wd>

<space/>

<wd l="9758" t="6432" r="9917" b="6562">to</wd>

<space/>

<wd l="9994" t="6456" r="10186" b="6562">an</wd>

<space/>

<wd l="10258" t="6408" r="10483" b="6557">in-</wd>

</ln>

<ln l="6091" t="6672" r="10483" b="6869" baseLine="6816" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6672" r="6869" b="6826">tellectual</wd>

<space/>

<wd l="6931" t="6672" r="7565" b="6854">feature;</wd>

<space/>

<wd l="7646" t="6672" r="8362" b="6826">however</wd>

<space/>

<wd l="8424" t="6672" r="8544" b="6826">it</wd>

<space/>

<wd l="8611" t="6672" r="9038" b="6826">often</wd>

<space/>

<wd l="9106" t="6720" r="9744" b="6869">appears</wd>

<space/>

<wd l="9811" t="6672" r="9974" b="6821">in</wd>

<space/>

<wd l="10037" t="6672" r="10483" b="6826">Twit-</wd>

</ln>

<ln l="6091" t="6931" r="10493" b="7128" baseLine="7080" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6955" r="6317" b="7085">ter</wd>

<space/>

<wd l="6379" t="6979" r="6542" b="7085">as</wd>

<space/>

<wd l="6610" t="6979" r="6701" b="7085">a</wd>

<space/>

<wd l="6754" t="6931" r="7862" b="7085">non-standard</wd>

<space/>

<wd l="7915" t="6931" r="8664" b="7085">variation</wd>

<space/>

<wd l="8726" t="6931" r="8909" b="7085">of</wd>

<space/>

<wd l="8957" t="6931" r="9562" b="7114">“with”,</wd>

<space/>

<wd l="9634" t="6979" r="9797" b="7085">as</wd>

<space/>

<wd l="9859" t="6931" r="10022" b="7080">in</wd>

<space/>

<wd l="10085" t="6936" r="10493" b="7128">“you</wd>

<space/>

</ln>

<ln l="6091" t="7195" r="10498" b="7378" baseLine="7339" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="7195" r="6365" b="7349">wit</wd>

<space/>

<wd l="6437" t="7243" r="6696" b="7349">me</wd>

<space/>

<wd l="6773" t="7195" r="7248" b="7349">hea?”</wd>

<space/>

<wd l="7330" t="7195" r="8098" b="7349">Likewise</wd>

<space/>

<wd l="8179" t="7243" r="8386" b="7349">on</wd>

<space/>

<wd l="8462" t="7195" r="9101" b="7378">Twitter,</wd>

<space/>

<wd l="9197" t="7200" r="9845" b="7349">“cause”</wd>

<space/>

<wd l="9931" t="7195" r="10498" b="7349">almost</wd>

<space/>

</ln>

<ln l="6096" t="7459" r="10493" b="7656" baseLine="7603" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="7459" r="6672" b="7656">always</wd>

<space/>

<wd l="6749" t="7507" r="7282" b="7613">means</wd>

<space/>

<wd l="7363" t="7459" r="8256" b="7613">“because”.</wd>

<space/>

<wd l="8395" t="7459" r="8813" b="7613">Such</wd>

<space/>

<wd l="8885" t="7459" r="9427" b="7613">tokens</wd>

<space/>

<wd l="9504" t="7507" r="9912" b="7613">were</wd>

<space/>

<wd l="9994" t="7459" r="10493" b="7613">added</wd>

<space/>

</ln>

<ln l="6091" t="7723" r="7958" b="7877" baseLine="7867" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="7747" r="6254" b="7877">to</wd>

<space/>

<wd l="6312" t="7723" r="6566" b="7877">the</wd>

<space/>

<wd l="6629" t="7723" r="7608" b="7877">substitution</wd>

<space/>

<wd l="7661" t="7723" r="7958" b="7877">list.</wd>

</ln>

</para>

<para l="6072" t="7987" r="10493" b="9758" alignment="justified" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="6302" t="7987" r="10493" b="8184" baseLine="8131" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6302" t="7992" r="6437" b="8141">It</wd>

<space/>

<wd l="6485" t="7987" r="6984" b="8184">might</wd>

<space/>

<wd l="7037" t="7987" r="7234" b="8141">be</wd>

<space/>

<wd l="7296" t="7987" r="8078" b="8184">supposed</wd>

<space/>

<wd l="8131" t="7987" r="8453" b="8141">that</wd>

<space/>

<wd l="8501" t="7987" r="8957" b="8184">using</wd>

<space/>

<wd l="9014" t="7987" r="9269" b="8141">the</wd>

<space/>

<wd l="9326" t="7987" r="9979" b="8184">training</wd>

<space/>

<wd l="10046" t="8011" r="10277" b="8141">set</wd>

<space/>

<wd l="10330" t="7987" r="10493" b="8136">in</wd>

<space/>

</ln>

<ln l="6091" t="8251" r="10493" b="8448" baseLine="8395" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="8251" r="6394" b="8405">this</wd>

<space/>

<wd l="6461" t="8299" r="6806" b="8448">way</wd>

<space/>

<wd l="6883" t="8251" r="7349" b="8405">could</wd>

<space/>

<wd l="7411" t="8251" r="7766" b="8405">lead</wd>

<space/>

<wd l="7829" t="8275" r="7987" b="8405">to</wd>

<space/>

<wd l="8069" t="8299" r="8587" b="8405">severe</wd>

<space/>

<wd l="8659" t="8251" r="9634" b="8448">over-fitting.</wd>

<space/>

<wd l="9749" t="8256" r="9960" b="8405">To</wd>

<space/>

<wd l="10037" t="8251" r="10493" b="8405">avoid</wd>

<space/>

</ln>

<ln l="6091" t="8510" r="10483" b="8707" baseLine="8659" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="8510" r="6442" b="8693">this,</wd>

<space/>

<wd l="6509" t="8558" r="6754" b="8664">we</wd>

<space/>

<wd l="6816" t="8510" r="7320" b="8664">didn’t</wd>

<space/>

<wd l="7373" t="8510" r="7824" b="8664">make</wd>

<space/>

<wd l="7891" t="8558" r="8179" b="8707">any</wd>

<space/>

<wd l="8246" t="8510" r="9250" b="8707">adjustments</wd>

<space/>

<wd l="9312" t="8510" r="9562" b="8664">for</wd>

<space/>

<wd l="9614" t="8510" r="10162" b="8664">tokens</wd>

<space/>

<wd l="10229" t="8558" r="10483" b="8707">ap-</wd>

</ln>

<ln l="6091" t="8774" r="10488" b="8971" baseLine="8918" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="8774" r="6720" b="8971">pearing</wd>

<space/>

<wd l="6782" t="8774" r="7094" b="8928">less</wd>

<space/>

<wd l="7152" t="8774" r="7512" b="8928">than</wd>

<space/>

<wd l="7574" t="8774" r="7992" b="8928">three</wd>

<space/>

<wd l="8050" t="8774" r="8501" b="8928">times</wd>

<space/>

<wd l="8568" t="8822" r="8736" b="8928">as</wd>

<space/>

<wd l="8798" t="8822" r="8890" b="8928">a</wd>

<space/>

<wd l="8942" t="8774" r="9336" b="8928">false</wd>

<space/>

<wd l="9394" t="8774" r="10099" b="8971">positive,</wd>

<space/>

<wd l="10162" t="8798" r="10488" b="8928">true</wd>

<space/>

</ln>

<ln l="6091" t="9038" r="10488" b="9235" baseLine="9182" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="9038" r="6797" b="9235">positive,</wd>

<space/>

<wd l="6864" t="9086" r="7037" b="9192">or</wd>

<space/>

<wd l="7085" t="9038" r="7478" b="9192">false</wd>

<space/>

<wd l="7531" t="9038" r="8270" b="9235">negative.</wd>

<space/>

<wd l="8352" t="9038" r="8678" b="9192">The</wd>

<space/>

<wd l="8731" t="9038" r="9278" b="9192">results</wd>

<space/>

<wd l="9341" t="9038" r="9768" b="9192">show</wd>

<space/>

<wd l="9821" t="9038" r="10142" b="9192">that</wd>

<space/>

<wd l="10195" t="9086" r="10488" b="9235">any</wd>

<space/>

</ln>

<ln l="6096" t="9302" r="10488" b="9499" baseLine="9446" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6096" t="9302" r="7027" b="9499">over-fitting</wd>

<space/>

<wd l="7080" t="9350" r="7402" b="9456">was</wd>

<space/>

<wd l="7454" t="9326" r="7728" b="9456">not</wd>

<space/>

<wd l="7781" t="9350" r="8347" b="9485">severe,</wd>

<space/>

<wd l="8414" t="9302" r="8837" b="9456">since</wd>

<space/>

<wd l="8885" t="9302" r="9139" b="9456">the</wd>

<space/>

<wd l="9192" t="9326" r="9490" b="9456">test</wd>

<space/>

<wd l="9533" t="9302" r="10118" b="9456">f-score</wd>

<space/>

<wd l="10166" t="9350" r="10488" b="9456">was</wd>

<space/>

</ln>

<ln l="6072" t="9562" r="9734" b="9758" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6072" t="9562" r="6403" b="9758">just</wd>

<space/>

<wd l="6456" t="9610" r="6754" b="9715">one</wd>

<space/>

<wd l="6811" t="9562" r="7248" b="9758">point</wd>

<space/>

<wd l="7296" t="9562" r="7608" b="9715">less</wd>

<space/>

<wd l="7670" t="9562" r="8026" b="9715">than</wd>

<space/>

<wd l="8083" t="9562" r="8342" b="9715">the</wd>

<space/>

<wd l="8395" t="9562" r="9048" b="9758">training</wd>

<space/>

<wd l="9110" t="9562" r="9734" b="9715">f-score.</wd>

</ln>

</para>

<para l="6091" t="10003" r="8707" b="10200" alignment="left" spaceBefore="198" lsp="exactly" lspExact="245" language="en">

<ln l="6091" t="10003" r="8707" b="10200" baseLine="10152" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="2">

<wd l="6091" t="10003" r="6355" b="10157">2.2</wd>

<space/>

<wd l="6571" t="10008" r="7579" b="10157">Rule-based</wd>

<space/>

<wd l="7637" t="10018" r="8707" b="10200">components</wd>

</ln>

</para>

<para l="6091" t="10349" r="10488" b="11597" alignment="justified" spaceBefore="71" lsp="exactly" lspExact="263" language="en">

<ln l="6091" t="10349" r="10483" b="10546" baseLine="10493" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="10354" r="6365" b="10502">We</wd>

<space/>

<wd l="6432" t="10349" r="6763" b="10502">also</wd>

<space/>

<wd l="6830" t="10349" r="7978" b="10546">experimented</wd>

<space/>

<wd l="8035" t="10349" r="8410" b="10502">with</wd>

<space/>

<wd l="8477" t="10349" r="9058" b="10502">several</wd>

<space/>

<wd l="9120" t="10349" r="9998" b="10502">rule-based</wd>

<space/>

<wd l="10061" t="10397" r="10483" b="10502">com-</wd>

</ln>

<ln l="6091" t="10608" r="10488" b="10805" baseLine="10757" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="10632" r="6792" b="10805">ponents,</wd>

<space/>

<wd l="6854" t="10632" r="7162" b="10762">two</wd>

<space/>

<wd l="7224" t="10608" r="7406" b="10762">of</wd>

<space/>

<wd l="7445" t="10608" r="8174" b="10762">which—</wd>

<space/>

<wd l="8222" t="10608" r="8885" b="10762">because</wd>

<space/>

<wd l="8938" t="10608" r="9293" b="10805">they</wd>

<space/>

<wd l="9355" t="10608" r="9970" b="10805">applied</wd>

<space/>

<wd l="10022" t="10608" r="10186" b="10757">in</wd>

<space/>

<wd l="10234" t="10608" r="10488" b="10762">the</wd>

<space/>

</ln>

<ln l="6096" t="10872" r="10488" b="11069" baseLine="11016" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6096" t="10896" r="6754" b="11069">greatest</wd>

<space/>

<wd l="6826" t="10872" r="7474" b="11026">number</wd>

<space/>

<wd l="7555" t="10872" r="7738" b="11026">of</wd>

<space/>

<wd l="7805" t="10920" r="8242" b="11026">cases</wd>

<space/>

<wd l="8323" t="10872" r="8486" b="11021">in</wd>

<space/>

<wd l="8563" t="10872" r="8822" b="11026">the</wd>

<space/>

<wd l="8899" t="10872" r="9552" b="11069">training</wd>

<space/>

<wd l="9643" t="10896" r="10488" b="11026">set—were</wd>

<space/>

</ln>

<ln l="6091" t="11136" r="10488" b="11333" baseLine="11280" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="11136" r="6480" b="11290">used</wd>

<space/>

<wd l="6562" t="11136" r="6725" b="11285">in</wd>

<space/>

<wd l="6806" t="11136" r="7061" b="11290">the</wd>

<space/>

<wd l="7152" t="11136" r="7522" b="11290">final</wd>

<space/>

<wd l="7613" t="11160" r="8227" b="11333">system.</wd>

<space/>

<wd l="8390" t="11136" r="8894" b="11290">These</wd>

<space/>

<wd l="8981" t="11160" r="9994" b="11333">components</wd>

<space/>

<wd l="10080" t="11184" r="10488" b="11290">were</wd>

<space/>

</ln>

<ln l="6091" t="11400" r="9048" b="11597" baseLine="11544" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="11400" r="6350" b="11554">the</wd>

<space/>

<wd l="6408" t="11400" r="6854" b="11597">“ing”</wd>

<space/>

<wd l="6917" t="11400" r="7243" b="11554">rule</wd>

<space/>

<wd l="7301" t="11400" r="7603" b="11554">and</wd>

<space/>

<wd l="7656" t="11400" r="7910" b="11554">the</wd>

<space/>

<wd l="7973" t="11400" r="8616" b="11554">“coool”</wd>

<space/>

<wd l="8678" t="11400" r="9048" b="11554">rule.</wd>

</ln>

</para>

<para l="6091" t="11664" r="10498" b="14486" alignment="justified" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="6302" t="11664" r="10483" b="11861" baseLine="11808" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="11664" r="6629" b="11818">The</wd>

<space/>

<wd l="6710" t="11664" r="7157" b="11861">“ing”</wd>

<space/>

<wd l="7234" t="11664" r="7560" b="11818">rule</wd>

<space/>

<wd l="7637" t="11664" r="8088" b="11818">looks</wd>

<space/>

<wd l="8165" t="11664" r="8414" b="11818">for</wd>

<space/>

<wd l="8491" t="11712" r="8928" b="11818">cases</wd>

<space/>

<wd l="9005" t="11664" r="9168" b="11813">in</wd>

<space/>

<wd l="9240" t="11664" r="9758" b="11818">which</wd>

<space/>

<wd l="9830" t="11664" r="10085" b="11818">the</wd>

<space/>

<wd l="10157" t="11712" r="10483" b="11818">ver-</wd>

</ln>

<ln l="6091" t="11928" r="10483" b="12125" baseLine="12072" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="11928" r="6350" b="12082">bal</wd>

<space/>

<wd l="6408" t="11928" r="6874" b="12082">suffix</wd>

<space/>

<wd l="6926" t="11928" r="7445" b="12125">“-ing”</wd>

<space/>

<wd l="7498" t="11928" r="7632" b="12082">is</wd>

<space/>

<wd l="7690" t="11928" r="8261" b="12082">altered</wd>

<space/>

<wd l="8309" t="11952" r="8467" b="12082">to</wd>

<space/>

<wd l="8525" t="11976" r="8717" b="12082">an</wd>

<space/>

<wd l="8774" t="11928" r="9235" b="12110">“-in”,</wd>

<space/>

<wd l="9298" t="11933" r="9792" b="12110">“-en”,</wd>

<space/>

<wd l="9854" t="11976" r="10027" b="12082">or</wd>

<space/>

<wd l="10080" t="11933" r="10483" b="12110">“-n”,</wd>

<space/>

</ln>

<ln l="6101" t="12187" r="10498" b="12384" baseLine="12336" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6101" t="12187" r="6480" b="12341">such</wd>

<space/>

<wd l="6533" t="12235" r="6696" b="12341">as</wd>

<space/>

<wd l="6749" t="12187" r="7205" b="12341">when</wd>

<space/>

<wd l="7258" t="12187" r="8050" b="12384">“busting”</wd>

<space/>

<wd l="8102" t="12187" r="8842" b="12341">becomes</wd>

<space/>

<wd l="8894" t="12187" r="9629" b="12341">“bustin”.</wd>

<space/>

<wd l="9706" t="12187" r="9859" b="12336">If</wd>

<space/>

<wd l="9893" t="12187" r="10152" b="12341">the</wd>

<space/>

<wd l="10200" t="12211" r="10498" b="12341">test</wd>

<space/>

</ln>

<ln l="6091" t="12451" r="10488" b="12648" baseLine="12595" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="12451" r="6562" b="12605">token</wd>

<space/>

<wd l="6629" t="12451" r="6763" b="12605">is</wd>

<space/>

<wd l="6835" t="12451" r="6998" b="12600">in</wd>

<space/>

<wd l="7066" t="12451" r="7325" b="12605">the</wd>

<space/>

<wd l="7397" t="12451" r="8280" b="12648">dictionary,</wd>

<space/>

<wd l="8362" t="12451" r="8616" b="12605">the</wd>

<space/>

<wd l="8693" t="12475" r="9629" b="12648">component</wd>

<space/>

<wd l="9696" t="12475" r="10488" b="12648">generates</wd>

<space/>

</ln>

<ln l="6091" t="12715" r="10483" b="12912" baseLine="12859" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="12763" r="6298" b="12869">no</wd>

<space/>

<wd l="6374" t="12715" r="7301" b="12869">candidates.</wd>

<space/>

<wd l="7426" t="12715" r="7574" b="12864">If</wd>

<space/>

<wd l="7632" t="12715" r="7886" b="12869">the</wd>

<space/>

<wd l="7958" t="12715" r="8424" b="12869">token</wd>

<space/>

<wd l="8491" t="12715" r="8630" b="12869">is</wd>

<space/>

<wd l="8702" t="12739" r="8976" b="12869">not</wd>

<space/>

<wd l="9038" t="12715" r="9202" b="12864">in</wd>

<space/>

<wd l="9269" t="12715" r="9523" b="12869">the</wd>

<space/>

<wd l="9600" t="12715" r="10483" b="12912">dictionary,</wd>

<space/>

</ln>

<ln l="6091" t="12979" r="10483" b="13176" baseLine="13123" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="12979" r="6350" b="13133">the</wd>

<space/>

<wd l="6427" t="13003" r="7368" b="13176">component</wd>

<space/>

<wd l="7435" t="12979" r="8002" b="13133">checks</wd>

<space/>

<wd l="8078" t="12979" r="8218" b="13128">if</wd>

<space/>

<wd l="8280" t="12979" r="8534" b="13133">the</wd>

<space/>

<wd l="8606" t="12979" r="9038" b="13133">word</wd>

<space/>

<wd l="9115" t="12979" r="9494" b="13133">ends</wd>

<space/>

<wd l="9571" t="12979" r="9946" b="13133">with</wd>

<space/>

<wd l="10022" t="12979" r="10483" b="13162">“-in”,</wd>

<space/>

</ln>

<ln l="6096" t="13238" r="10493" b="13435" baseLine="13387" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="13243" r="6595" b="13421">“-en”,</wd>

<space/>

<wd l="6667" t="13286" r="6840" b="13392">or</wd>

<space/>

<wd l="6898" t="13243" r="7248" b="13387">“-n”</wd>

<space/>

<wd l="7310" t="13238" r="8184" b="13435">proceeded</wd>

<space/>

<wd l="8237" t="13238" r="8443" b="13435">by</wd>

<space/>

<wd l="8510" t="13238" r="9082" b="13392">certain</wd>

<space/>

<wd l="9144" t="13262" r="10118" b="13421">consonants,</wd>

<space/>

<wd l="10190" t="13238" r="10493" b="13392">and</wd>

<space/>

</ln>

<ln l="6091" t="13502" r="10483" b="13699" baseLine="13646" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="13502" r="6235" b="13651">if</wd>

<space/>

<wd l="6307" t="13550" r="6528" b="13685">so,</wd>

<space/>

<wd l="6619" t="13502" r="7186" b="13656">checks</wd>

<space/>

<wd l="7267" t="13502" r="7517" b="13656">for</wd>

<space/>

<wd l="7589" t="13502" r="7843" b="13656">the</wd>

<space/>

<wd l="7920" t="13502" r="8774" b="13656">likelihood</wd>

<space/>

<wd l="8856" t="13502" r="9038" b="13656">of</wd>

<space/>

<wd l="9106" t="13502" r="9946" b="13656">additional</wd>

<space/>

<wd l="10032" t="13502" r="10483" b="13699">sylla-</wd>

</ln>

<ln l="6091" t="13766" r="10483" b="13963" baseLine="13910" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="13766" r="6470" b="13920">bles.</wd>

<space/>

<wd l="6600" t="13766" r="6754" b="13915">If</wd>

<space/>

<wd l="6811" t="13766" r="7253" b="13920">those</wd>

<space/>

<wd l="7330" t="13766" r="8203" b="13920">conditions</wd>

<space/>

<wd l="8280" t="13766" r="8698" b="13949">hold,</wd>

<space/>

<wd l="8779" t="13766" r="8899" b="13920">it</wd>

<space/>

<wd l="8966" t="13766" r="9653" b="13963">replaces</wd>

<space/>

<wd l="9730" t="13766" r="9984" b="13920">the</wd>

<space/>

<wd l="10056" t="13766" r="10483" b="13920">iden-</wd>

</ln>

<ln l="6091" t="14030" r="10488" b="14227" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="14030" r="6528" b="14184">tified</wd>

<space/>

<wd l="6610" t="14030" r="7176" b="14227">ending</wd>

<space/>

<wd l="7258" t="14030" r="7632" b="14184">with</wd>

<space/>

<wd l="7714" t="14030" r="8213" b="14227">“ing”,</wd>

<space/>

<wd l="8309" t="14030" r="8611" b="14184">and</wd>

<space/>

<wd l="8688" t="14030" r="8827" b="14179">if</wd>

<space/>

<wd l="8894" t="14030" r="9149" b="14184">the</wd>

<space/>

<wd l="9226" t="14030" r="9701" b="14184">result</wd>

<space/>

<wd l="9773" t="14030" r="9912" b="14184">is</wd>

<space/>

<wd l="9994" t="14030" r="10157" b="14179">in</wd>

<space/>

<wd l="10234" t="14030" r="10488" b="14184">the</wd>

<space/>

</ln>

<ln l="6096" t="14290" r="9005" b="14486" baseLine="14438" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="14290" r="6979" b="14486">dictionary,</wd>

<space/>

<wd l="7042" t="14290" r="7162" b="14443">it</wd>

<space/>

<wd l="7214" t="14290" r="7949" b="14443">becomes</wd>

<space/>

<wd l="8011" t="14338" r="8102" b="14443">a</wd>

<space/>

<wd l="8160" t="14290" r="9005" b="14443">candidate.</wd>

</ln>

</para>

<para l="6302" t="14558" r="10483" b="14755" alignment="justified" li="216" spaceAfter="16" lsp="exactly" lspExact="264" language="en">

<ln l="6302" t="14558" r="10483" b="14755" baseLine="14702" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="14558" r="6629" b="14712">The</wd>

<space/>

<wd l="6706" t="14558" r="7349" b="14712">“coool”</wd>

<space/>

<wd l="7421" t="14558" r="7747" b="14712">rule</wd>

<space/>

<wd l="7824" t="14582" r="8530" b="14755">attempts</wd>

<space/>

<wd l="8602" t="14582" r="8760" b="14712">to</wd>

<space/>

<wd l="8832" t="14558" r="9672" b="14712">normalize</wd>

<space/>

<wd l="9744" t="14582" r="10061" b="14712">text</wd>

<space/>

<wd l="10123" t="14558" r="10483" b="14741">that,</wd>

</ln>

</para>

</column>

</section>

<dd l="5776" t="15746" r="6171" b="15975">

<para l="5809" t="15792" r="6138" b="15946" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5875" t="15792" r="6072" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="26">

<wd l="5875" t="15792" r="6072" b="15946">83</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4312.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1405" marginTop="1440" marginRight="1396" marginBottom="1292" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1405" t="2141" r="10513" b="14764">

<column l="1405" t="2141" r="5840" b="14764">

<para l="1411" t="2198" r="5818" b="5549" alignment="justified" lsp="exactly" lspExact="262" language="en">

<ln l="1411" t="2198" r="5808" b="2395" baseLine="2342" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="2198" r="1661" b="2352">for</wd>

<space/>

<wd l="1747" t="2198" r="2573" b="2395">emphasis,</wd>

<space/>

<wd l="2678" t="2222" r="3269" b="2395">repeats</wd>

<space/>

<wd l="3365" t="2198" r="4262" b="2381">characters,</wd>

<space/>

<wd l="4368" t="2246" r="4531" b="2352">as</wd>

<space/>

<wd l="4622" t="2198" r="4786" b="2347">in</wd>

<space/>

<wd l="4877" t="2198" r="5510" b="2352">“Thaatt</wd>

<space/>

<wd l="5587" t="2198" r="5808" b="2352">iss</wd>

<space/>

</ln>

<ln l="1411" t="2462" r="5808" b="2659" baseLine="2606" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="2462" r="2314" b="2659">reallyyyyy</wd>

<space/>

<wd l="2429" t="2467" r="3216" b="2616">neeeeat!”</wd>

<space/>

<wd l="3336" t="2467" r="3547" b="2616">To</wd>

<space/>

<wd l="3667" t="2486" r="4373" b="2659">generate</wd>

<space/>

<wd l="4493" t="2462" r="5424" b="2645">candidates,</wd>

<space/>

<wd l="5554" t="2462" r="5808" b="2616">the</wd>

<space/>

</ln>

<ln l="1416" t="2722" r="5803" b="2918" baseLine="2870" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="2722" r="2064" b="2875">“coool”</wd>

<space/>

<wd l="2150" t="2722" r="2477" b="2875">rule</wd>

<space/>

<wd l="2568" t="2722" r="2971" b="2875">finds</wd>

<space/>

<wd l="3062" t="2770" r="3514" b="2918">every</wd>

<space/>

<wd l="3600" t="2770" r="3878" b="2875">run</wd>

<space/>

<wd l="3965" t="2722" r="4147" b="2875">of</wd>

<space/>

<wd l="4219" t="2770" r="4651" b="2875">more</wd>

<space/>

<wd l="4738" t="2722" r="5098" b="2875">than</wd>

<space/>

<wd l="5179" t="2746" r="5491" b="2875">two</wd>

<space/>

<wd l="5578" t="2770" r="5803" b="2875">re-</wd>

</ln>

<ln l="1411" t="2986" r="5813" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="2986" r="1963" b="3182">peated</wd>

<space/>

<wd l="2035" t="2986" r="2885" b="3139">characters</wd>

<space/>

<wd l="2962" t="2986" r="3264" b="3139">and</wd>

<space/>

<wd l="3331" t="2986" r="3970" b="3139">reduces</wd>

<space/>

<wd l="4042" t="2986" r="4296" b="3139">the</wd>

<space/>

<wd l="4368" t="2986" r="4896" b="3182">length</wd>

<space/>

<wd l="4968" t="2986" r="5150" b="3139">of</wd>

<space/>

<wd l="5208" t="2986" r="5462" b="3139">the</wd>

<space/>

<wd l="5530" t="3034" r="5813" b="3139">run</wd>

<space/>

</ln>

<ln l="1411" t="3250" r="5818" b="3446" baseLine="3394" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="3274" r="1574" b="3403">to</wd>

<space/>

<wd l="1637" t="3274" r="1987" b="3403">two.</wd>

<space/>

<wd l="2083" t="3254" r="2376" b="3403">For</wd>

<space/>

<wd l="2434" t="3298" r="2885" b="3446">every</wd>

<space/>

<wd l="2952" t="3298" r="3250" b="3403">one</wd>

<space/>

<wd l="3317" t="3250" r="3499" b="3403">of</wd>

<space/>

<wd l="3542" t="3250" r="3974" b="3403">these</wd>

<space/>

<wd l="4037" t="3298" r="4445" b="3432">runs,</wd>

<space/>

<wd l="4512" t="3298" r="4757" b="3403">we</wd>

<space/>

<wd l="4819" t="3298" r="5434" b="3403">assume</wd>

<space/>

<wd l="5496" t="3250" r="5818" b="3403">that</wd>

<space/>

</ln>

<ln l="1411" t="3514" r="5813" b="3710" baseLine="3658" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="3514" r="1670" b="3667">the</wd>

<space/>

<wd l="1723" t="3514" r="2376" b="3710">original</wd>

<space/>

<wd l="2424" t="3514" r="2726" b="3667">had</wd>

<space/>

<wd l="2779" t="3514" r="3259" b="3667">either</wd>

<space/>

<wd l="3307" t="3562" r="3605" b="3667">one</wd>

<space/>

<wd l="3662" t="3562" r="3835" b="3667">or</wd>

<space/>

<wd l="3878" t="3538" r="4190" b="3667">two</wd>

<space/>

<wd l="4243" t="3514" r="4426" b="3667">of</wd>

<space/>

<wd l="4464" t="3514" r="4786" b="3667">that</wd>

<space/>

<wd l="4834" t="3514" r="5602" b="3667">character</wd>

<space/>

<wd l="5650" t="3514" r="5813" b="3662">in</wd>

<space/>

</ln>

<ln l="1411" t="3773" r="5803" b="3970" baseLine="3922" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="3773" r="1733" b="3926">that</wd>

<space/>

<wd l="1786" t="3773" r="2270" b="3970">place.</wd>

<space/>

<wd l="2357" t="3778" r="2630" b="3926">We</wd>

<space/>

<wd l="2693" t="3773" r="3403" b="3926">consider</wd>

<space/>

<wd l="3461" t="3821" r="3912" b="3970">every</wd>

<space/>

<wd l="3984" t="3773" r="4450" b="3970">string</wd>

<space/>

<wd l="4512" t="3773" r="4829" b="3926">that</wd>

<space/>

<wd l="4886" t="3821" r="5170" b="3926">can</wd>

<space/>

<wd l="5232" t="3773" r="5429" b="3926">be</wd>

<space/>

<wd l="5491" t="3821" r="5803" b="3926">cre-</wd>

</ln>

<ln l="1416" t="4037" r="5808" b="4234" baseLine="4186" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="4037" r="1766" b="4190">ated</wd>

<space/>

<wd l="1829" t="4037" r="2035" b="4234">by</wd>

<space/>

<wd l="2107" t="4037" r="2842" b="4234">reducing</wd>

<space/>

<wd l="2914" t="4085" r="3005" b="4190">a</wd>

<space/>

<wd l="3077" t="4037" r="3600" b="4190">subset</wd>

<space/>

<wd l="3667" t="4037" r="3850" b="4190">of</wd>

<space/>

<wd l="3902" t="4037" r="4157" b="4190">the</wd>

<space/>

<wd l="4224" t="4037" r="5386" b="4190">two-character</wd>

<space/>

<wd l="5448" t="4085" r="5808" b="4190">runs</wd>

<space/>

</ln>

<ln l="1411" t="4301" r="5808" b="4498" baseLine="4445" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="4325" r="1574" b="4454">to</wd>

<space/>

<wd l="1646" t="4349" r="1944" b="4454">one</wd>

<space/>

<wd l="2016" t="4301" r="2789" b="4454">character</wd>

<space/>

<wd l="2856" t="4301" r="3283" b="4483">each,</wd>

<space/>

<wd l="3365" t="4301" r="3662" b="4454">and</wd>

<space/>

<wd l="3730" t="4325" r="4234" b="4454">return</wd>

<space/>

<wd l="4301" t="4301" r="4670" b="4498">only</wd>

<space/>

<wd l="4738" t="4301" r="5184" b="4454">those</wd>

<space/>

<wd l="5256" t="4301" r="5808" b="4498">strings</wd>

<space/>

</ln>

<ln l="1411" t="4565" r="5803" b="4762" baseLine="4709" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="4565" r="1733" b="4718">that</wd>

<space/>

<wd l="1781" t="4613" r="2246" b="4718">occur</wd>

<space/>

<wd l="2294" t="4565" r="2458" b="4714">in</wd>

<space/>

<wd l="2506" t="4565" r="2760" b="4718">the</wd>

<space/>

<wd l="2813" t="4565" r="3691" b="4762">dictionary.</wd>

<space/>

<wd l="3768" t="4570" r="4061" b="4718">For</wd>

<space/>

<wd l="4114" t="4565" r="4862" b="4762">example,</wd>

<space/>

<wd l="4920" t="4565" r="5064" b="4714">if</wd>

<space/>

<wd l="5098" t="4565" r="5352" b="4718">the</wd>

<space/>

<wd l="5405" t="4565" r="5803" b="4762">orig-</wd>

</ln>

<ln l="1411" t="4824" r="5803" b="5006" baseLine="4973" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="4824" r="1728" b="4978">inal</wd>

<space/>

<wd l="1790" t="4824" r="2261" b="4978">token</wd>

<space/>

<wd l="2323" t="4824" r="2458" b="4978">is</wd>

<space/>

<wd l="2530" t="4824" r="3226" b="5006">“thaatt”,</wd>

<space/>

<wd l="3298" t="4872" r="3542" b="4978">we</wd>

<space/>

<wd l="3614" t="4824" r="4325" b="4978">consider</wd>

<space/>

<wd l="4392" t="4824" r="5088" b="5006">“thaatt”,</wd>

<space/>

<wd l="5165" t="4824" r="5803" b="5006">“thaat”,</wd>

<space/>

</ln>

<ln l="1416" t="5088" r="5813" b="5285" baseLine="5237" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="5088" r="2021" b="5270">“thatt”,</wd>

<space/>

<wd l="2112" t="5088" r="2410" b="5242">and</wd>

<space/>

<wd l="2486" t="5088" r="3029" b="5270">“that”,</wd>

<space/>

<wd l="3115" t="5088" r="3384" b="5242">but</wd>

<space/>

<wd l="3451" t="5112" r="3955" b="5242">return</wd>

<space/>

<wd l="4032" t="5088" r="4397" b="5285">only</wd>

<space/>

<wd l="4478" t="5088" r="5021" b="5270">“that”,</wd>

<space/>

<wd l="5107" t="5088" r="5573" b="5285">being</wd>

<space/>

<wd l="5650" t="5088" r="5813" b="5237">in</wd>

<space/>

</ln>

<ln l="1411" t="5352" r="2606" b="5549" baseLine="5496" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="5352" r="1670" b="5506">the</wd>

<space/>

<wd l="1728" t="5352" r="2606" b="5549">dictionary.</wd>

</ln>

</para>

<para l="1411" t="5616" r="5813" b="7656" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="262" language="en">

<ln l="1622" t="5616" r="5813" b="5813" baseLine="5765" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1622" t="5616" r="2131" b="5770">When</wd>

<space/>

<wd l="2203" t="5616" r="2458" b="5770">the</wd>

<space/>

<wd l="2539" t="5640" r="3120" b="5813">system</wd>

<space/>

<wd l="3187" t="5616" r="3326" b="5770">is</wd>

<space/>

<wd l="3403" t="5664" r="3682" b="5770">run</wd>

<space/>

<wd l="3754" t="5616" r="4128" b="5770">with</wd>

<space/>

<wd l="4205" t="5616" r="4574" b="5813">only</wd>

<space/>

<wd l="4646" t="5616" r="4906" b="5770">the</wd>

<space/>

<wd l="4982" t="5616" r="5429" b="5813">“ing”</wd>

<space/>

<wd l="5510" t="5616" r="5813" b="5770">and</wd>

<space/>

</ln>

<ln l="1416" t="5880" r="5808" b="6077" baseLine="6024" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="5880" r="2064" b="6034">“coool”</wd>

<space/>

<wd l="2136" t="5880" r="2592" b="6062">rules,</wd>

<space/>

<wd l="2674" t="5880" r="3019" b="6077">plus</wd>

<space/>

<wd l="3091" t="5880" r="3346" b="6034">the</wd>

<space/>

<wd l="3422" t="5904" r="4142" b="6034">sentence</wd>

<space/>

<wd l="4210" t="5880" r="4613" b="6034">level</wd>

<space/>

<wd l="4680" t="5880" r="5486" b="6062">re-ranker,</wd>

<space/>

<wd l="5568" t="5928" r="5808" b="6034">we</wd>

<space/>

</ln>

<ln l="1416" t="6144" r="5803" b="6341" baseLine="6288">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1416" t="6168" r="1675" b="6341">get</wd>

<space/>

<wd l="1738" t="6192" r="1829" b="6298">a</wd>

<space/>

<wd l="1891" t="6144" r="2664" b="6341">precision</wd>

<space/>

<wd l="2731" t="6144" r="2914" b="6298">of</wd>

<space/>

</run>

<wd l="2981" t="6149" r="3221" b="6298"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">81</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3302" t="6144" r="3605" b="6298">and</wd>

<space/>

<wd l="3672" t="6192" r="3763" b="6298">a</wd>

<space/>

<wd l="3821" t="6144" r="4291" b="6298">recall</wd>

<space/>

<wd l="4358" t="6144" r="4541" b="6298">of</wd>

<space/>

</run>

<wd l="4603" t="6149" r="4901" b="6298"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">09.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="5006" t="6149" r="5803" b="6326">However,</wd>

<space/>

</run>

</ln>

<ln l="1411" t="6403" r="5803" b="6600" baseLine="6552" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="6403" r="1872" b="6557">when</wd>

<space/>

<wd l="1925" t="6403" r="2755" b="6557">combined</wd>

<space/>

<wd l="2808" t="6403" r="3182" b="6557">with</wd>

<space/>

<wd l="3235" t="6403" r="3490" b="6557">the</wd>

<space/>

<wd l="3542" t="6427" r="3854" b="6557">rest</wd>

<space/>

<wd l="3902" t="6403" r="4085" b="6557">of</wd>

<space/>

<wd l="4128" t="6403" r="4382" b="6557">the</wd>

<space/>

<wd l="4445" t="6427" r="5069" b="6600">system,</wd>

<space/>

<wd l="5126" t="6403" r="5381" b="6557">it’s</wd>

<space/>

<wd l="5443" t="6451" r="5803" b="6557">con-</wd>

</ln>

<ln l="1411" t="6667" r="5803" b="6864" baseLine="6816" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="6667" r="2136" b="6821">tribution</wd>

<space/>

<wd l="2198" t="6667" r="2333" b="6821">is</wd>

<space/>

<wd l="2395" t="6667" r="3480" b="6864">insignificant.</wd>

<space/>

<wd l="3581" t="6672" r="3710" b="6821">It</wd>

<space/>

<wd l="3778" t="6715" r="4282" b="6821">seems</wd>

<space/>

<wd l="4344" t="6667" r="4666" b="6821">that</wd>

<space/>

<wd l="4718" t="6667" r="4978" b="6821">the</wd>

<space/>

<wd l="5035" t="6691" r="5453" b="6821">most</wd>

<space/>

<wd l="5506" t="6667" r="5803" b="6821">fre-</wd>

</ln>

<ln l="1416" t="6931" r="5803" b="7128" baseLine="7075" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="6955" r="1886" b="7128">quent</wd>

<space/>

<wd l="1934" t="6931" r="2707" b="7085">instances</wd>

<space/>

<wd l="2765" t="6931" r="2947" b="7085">of</wd>

<space/>

<wd l="2990" t="6931" r="3422" b="7085">these</wd>

<space/>

<wd l="3475" t="6931" r="3883" b="7085">rules</wd>

<space/>

<wd l="3946" t="6979" r="4195" b="7085">are</wd>

<space/>

<wd l="4253" t="6931" r="4867" b="7128">already</wd>

<space/>

<wd l="4925" t="6931" r="5088" b="7080">in</wd>

<space/>

<wd l="5141" t="6931" r="5395" b="7085">the</wd>

<space/>

<wd l="5458" t="6931" r="5803" b="7085">sub-</wd>

</ln>

<ln l="1421" t="7195" r="5808" b="7392" baseLine="7339" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="7195" r="2107" b="7349">stitution</wd>

<space/>

<wd l="2150" t="7195" r="2453" b="7378">list,</wd>

<space/>

<wd l="2515" t="7243" r="2688" b="7349">so</wd>

<space/>

<wd l="2736" t="7195" r="2990" b="7349">the</wd>

<space/>

<wd l="3038" t="7195" r="3442" b="7349">rules</wd>

<space/>

<wd l="3494" t="7195" r="3701" b="7349">do</wd>

<space/>

<wd l="3744" t="7219" r="4022" b="7349">not</wd>

<space/>

<wd l="4066" t="7219" r="4771" b="7392">generate</wd>

<space/>

<wd l="4824" t="7195" r="5443" b="7392">enough</wd>

<space/>

<wd l="5482" t="7219" r="5808" b="7349">true</wd>

<space/>

</ln>

<ln l="1411" t="7459" r="5506" b="7656" baseLine="7603" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1411" t="7459" r="2150" b="7656">positives</wd>

<space/>

<wd l="2208" t="7483" r="2371" b="7613">to</wd>

<space/>

<wd l="2434" t="7459" r="2909" b="7613">offset</wd>

<space/>

<wd l="2957" t="7459" r="3346" b="7613">their</wd>

<space/>

<wd l="3403" t="7459" r="4219" b="7656">generated</wd>

<space/>

<wd l="4272" t="7459" r="4670" b="7613">false</wd>

<space/>

<wd l="4723" t="7459" r="5506" b="7656">positives.</wd>

</ln>

</para>

<para l="1411" t="7723" r="5813" b="11074" alignment="justified" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="1622" t="7723" r="5803" b="7920" baseLine="7867" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="7723" r="2150" b="7920">Along</wd>

<space/>

<wd l="2232" t="7723" r="2606" b="7877">with</wd>

<space/>

<wd l="2693" t="7723" r="3139" b="7920">“ing”</wd>

<space/>

<wd l="3230" t="7723" r="3528" b="7877">and</wd>

<space/>

<wd l="3610" t="7723" r="4310" b="7906">“coool”,</wd>

<space/>

<wd l="4402" t="7771" r="4646" b="7877">we</wd>

<space/>

<wd l="4728" t="7723" r="5117" b="7877">tried</wd>

<space/>

<wd l="5198" t="7771" r="5290" b="7877">a</wd>

<space/>

<wd l="5366" t="7771" r="5803" b="7877">num-</wd>

</ln>

<ln l="1411" t="7982" r="5803" b="8179" baseLine="8131" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="7982" r="1685" b="8136">ber</wd>

<space/>

<wd l="1742" t="7982" r="1925" b="8136">of</wd>

<space/>

<wd l="1982" t="7982" r="2558" b="8136">similar</wd>

<space/>

<wd l="2616" t="7982" r="3494" b="8136">rule-based</wd>

<space/>

<wd l="3557" t="8006" r="4613" b="8179">components.</wd>

<space/>

<wd l="4699" t="7987" r="4992" b="8136">For</wd>

<space/>

<wd l="5050" t="7982" r="5803" b="8179">example,</wd>

<space/>

</ln>

<ln l="1411" t="8246" r="5803" b="8443" baseLine="8395" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="8294" r="1656" b="8400">we</wd>

<space/>

<wd l="1742" t="8246" r="2314" b="8400">looked</wd>

<space/>

<wd l="2395" t="8246" r="2645" b="8400">for</wd>

<space/>

<wd l="2726" t="8294" r="3163" b="8400">cases</wd>

<space/>

<wd l="3250" t="8246" r="3763" b="8400">where</wd>

<space/>

<wd l="3850" t="8246" r="4190" b="8400">“th”</wd>

<space/>

<wd l="4277" t="8246" r="4416" b="8400">is</wd>

<space/>

<wd l="4502" t="8246" r="5218" b="8443">replaced</wd>

<space/>

<wd l="5299" t="8246" r="5506" b="8443">by</wd>

<space/>

<wd l="5597" t="8246" r="5803" b="8400">ei-</wd>

</ln>

<ln l="1411" t="8510" r="5808" b="8707" baseLine="8654" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="8510" r="1742" b="8664">ther</wd>

<space/>

<wd l="1795" t="8510" r="2126" b="8693">“d”,</wd>

<space/>

<wd l="2194" t="8510" r="2491" b="8693">“f”,</wd>

<space/>

<wd l="2554" t="8558" r="2726" b="8664">or</wd>

<space/>

<wd l="2779" t="8515" r="3058" b="8664">“t”.</wd>

<space/>

<wd l="3139" t="8510" r="3835" b="8664">Another</wd>

<space/>

<wd l="3888" t="8510" r="4594" b="8707">example</wd>

<space/>

<wd l="4646" t="8510" r="4781" b="8664">is</wd>

<space/>

<wd l="4834" t="8510" r="5093" b="8664">the</wd>

<space/>

<wd l="5146" t="8510" r="5808" b="8664">“double</wd>

<space/>

</ln>

<ln l="1416" t="8774" r="5813" b="8957" baseLine="8918" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="8779" r="2357" b="8928">consonant”</wd>

<space/>

<wd l="2414" t="8774" r="2789" b="8957">rule,</wd>

<space/>

<wd l="2851" t="8774" r="3331" b="8928">based</wd>

<space/>

<wd l="3389" t="8822" r="3595" b="8928">on</wd>

<space/>

<wd l="3643" t="8774" r="3902" b="8928">the</wd>

<space/>

<wd l="3955" t="8774" r="4310" b="8928">idea</wd>

<space/>

<wd l="4358" t="8774" r="4680" b="8928">that</wd>

<space/>

<wd l="4728" t="8774" r="5184" b="8928">when</wd>

<space/>

<wd l="5242" t="8822" r="5333" b="8928">a</wd>

<space/>

<wd l="5381" t="8774" r="5813" b="8928">word</wd>

<space/>

</ln>

<ln l="1416" t="9038" r="5813" b="9221" baseLine="9182" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="9038" r="1795" b="9192">ends</wd>

<space/>

<wd l="1882" t="9038" r="2256" b="9192">with</wd>

<space/>

<wd l="2338" t="9062" r="2650" b="9192">two</wd>

<space/>

<wd l="2741" t="9062" r="3720" b="9221">consonants,</wd>

<space/>

<wd l="3821" t="9038" r="4118" b="9192">and</wd>

<space/>

<wd l="4200" t="9038" r="4579" b="9192">both</wd>

<space/>

<wd l="4661" t="9086" r="4915" b="9192">are</wd>

<space/>

<wd l="4997" t="9038" r="5558" b="9192">voiced</wd>

<space/>

<wd l="5640" t="9086" r="5813" b="9192">or</wd>

<space/>

</ln>

<ln l="1411" t="9298" r="5798" b="9494" baseLine="9446" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9298" r="1790" b="9451">both</wd>

<space/>

<wd l="1843" t="9346" r="2098" b="9451">are</wd>

<space/>

<wd l="2150" t="9298" r="2957" b="9480">unvoiced,</wd>

<space/>

<wd l="3019" t="9298" r="3274" b="9451">the</wd>

<space/>

<wd l="3336" t="9298" r="3912" b="9451">second</wd>

<space/>

<wd l="3970" t="9322" r="4829" b="9451">consonant</wd>

<space/>

<wd l="4877" t="9298" r="5011" b="9451">is</wd>

<space/>

<wd l="5074" t="9298" r="5798" b="9494">dropped.</wd>

<space/>

</ln>

<ln l="1411" t="9562" r="5808" b="9758" baseLine="9706" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9566" r="1704" b="9715">For</wd>

<space/>

<wd l="1805" t="9562" r="2510" b="9758">example</wd>

<space/>

<wd l="2616" t="9562" r="3211" b="9715">“wrist”</wd>

<space/>

<wd l="3317" t="9562" r="4051" b="9715">becomes</wd>

<space/>

<wd l="4157" t="9562" r="4742" b="9715">“wris”.</wd>

<space/>

<wd l="4954" t="9562" r="5458" b="9715">These</wd>

<space/>

<wd l="5558" t="9610" r="5808" b="9715">are</wd>

<space/>

</ln>

<ln l="1411" t="9826" r="5813" b="10022" baseLine="9970" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9826" r="2376" b="10022">widespread</wd>

<space/>

<wd l="2434" t="9826" r="3451" b="10022">phenomena,</wd>

<space/>

<wd l="3518" t="9826" r="3787" b="9979">but</wd>

<space/>

<wd l="3845" t="9850" r="4118" b="9979">not</wd>

<space/>

<wd l="4171" t="9826" r="5131" b="10022">widespread</wd>

<space/>

<wd l="5194" t="9826" r="5813" b="10022">enough</wd>

<space/>

</ln>

<ln l="1416" t="10090" r="5808" b="10286" baseLine="10234" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10138" r="1622" b="10243">on</wd>

<space/>

<wd l="1670" t="10090" r="2275" b="10243">Twitter</wd>

<space/>

<wd l="2318" t="10090" r="2568" b="10243">for</wd>

<space/>

<wd l="2611" t="10090" r="2866" b="10243">the</wd>

<space/>

<wd l="2914" t="10118" r="3240" b="10243">true</wd>

<space/>

<wd l="3283" t="10090" r="4022" b="10286">positives</wd>

<space/>

<wd l="4075" t="10114" r="4234" b="10243">to</wd>

<space/>

<wd l="4286" t="10090" r="5069" b="10286">outweigh</wd>

<space/>

<wd l="5112" t="10090" r="5371" b="10243">the</wd>

<space/>

<wd l="5414" t="10090" r="5808" b="10243">false</wd>

<space/>

</ln>

<ln l="1411" t="10349" r="5803" b="10546" baseLine="10498" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10349" r="2194" b="10546">positives.</wd>

<space/>

<wd l="2275" t="10354" r="2429" b="10498">A</wd>

<space/>

<wd l="2477" t="10397" r="2909" b="10502">more</wd>

<space/>

<wd l="2966" t="10349" r="3686" b="10502">sensitive</wd>

<space/>

<wd l="3739" t="10349" r="4066" b="10502">rule</wd>

<space/>

<wd l="4123" t="10397" r="4296" b="10502">or</wd>

<space/>

<wd l="4344" t="10397" r="4435" b="10502">a</wd>

<space/>

<wd l="4483" t="10349" r="4968" b="10502">better</wd>

<space/>

<wd l="5026" t="10373" r="5803" b="10502">sentence-</wd>

</ln>

<ln l="1411" t="10613" r="5803" b="10766" baseLine="10757" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10613" r="1814" b="10766">level</wd>

<space/>

<wd l="1867" t="10613" r="2640" b="10766">re-ranker</wd>

<space/>

<wd l="2693" t="10613" r="3216" b="10766">would</wd>

<space/>

<wd l="3269" t="10613" r="3466" b="10766">be</wd>

<space/>

<wd l="3518" t="10613" r="4118" b="10766">needed</wd>

<space/>

<wd l="4171" t="10637" r="4330" b="10766">to</wd>

<space/>

<wd l="4387" t="10613" r="4838" b="10766">make</wd>

<space/>

<wd l="4891" t="10613" r="5323" b="10766">these</wd>

<space/>

<wd l="5381" t="10661" r="5803" b="10766">com-</wd>

</ln>

<ln l="1411" t="10877" r="2986" b="11074" baseLine="11021" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10901" r="2064" b="11074">ponents</wd>

<space/>

<wd l="2126" t="10877" r="2986" b="11030">beneficial.</wd>

</ln>

</para>

<para l="1411" t="11318" r="4152" b="11472" alignment="left" spaceBefore="198" lsp="exactly" lspExact="248" language="en">

<ln l="1411" t="11318" r="4152" b="11472" baseLine="11462" bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="6">

<wd l="1411" t="11318" r="1670" b="11472">2.3</wd>

<space/>

<wd l="1896" t="11318" r="2688" b="11472">Sentence</wd>

<space/>

<wd l="2741" t="11323" r="3230" b="11472">Level</wd>

<space/>

<wd l="3288" t="11323" r="4152" b="11472">Reranker</wd>

</ln>

</para>

<para l="1411" t="11659" r="5818" b="12384" alignment="justified" spaceBefore="75" lsp="exactly" lspExact="263" language="en">

<ln l="1411" t="11659" r="5803" b="11856" baseLine="11808" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="11659" r="1738" b="11813">The</wd>

<space/>

<wd l="1795" t="11659" r="2194" b="11813">third</wd>

<space/>

<wd l="2246" t="11659" r="2741" b="11856">major</wd>

<space/>

<wd l="2794" t="11683" r="3734" b="11856">component</wd>

<space/>

<wd l="3787" t="11659" r="3970" b="11813">of</wd>

<space/>

<wd l="4008" t="11659" r="4267" b="11813">the</wd>

<space/>

<wd l="4330" t="11683" r="4910" b="11856">system</wd>

<space/>

<wd l="4958" t="11659" r="5093" b="11813">is</wd>

<space/>

<wd l="5150" t="11659" r="5405" b="11813">the</wd>

<space/>

<wd l="5472" t="11707" r="5803" b="11813">sen-</wd>

</ln>

<ln l="1411" t="11923" r="5818" b="12120" baseLine="12067" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="11947" r="1858" b="12077">tence</wd>

<space/>

<wd l="1954" t="11923" r="3226" b="12077">level-re-ranker.</wd>

<space/>

<wd l="3427" t="11923" r="3797" b="12077">This</wd>

<space/>

<wd l="3893" t="11923" r="4080" b="12106">is,</wd>

<space/>

<wd l="4190" t="11923" r="4354" b="12072">in</wd>

<space/>

<wd l="4459" t="11923" r="4915" b="12106">short,</wd>

<space/>

<wd l="5030" t="11971" r="5122" b="12077">a</wd>

<space/>

<wd l="5213" t="11923" r="5818" b="12120">bigram</wd>

<space/>

</ln>

<ln l="1411" t="12187" r="2914" b="12384" baseLine="12331" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="12187" r="1997" b="12341">Viterbi</wd>

<space/>

<wd l="2059" t="12187" r="2914" b="12384">algorithm.</wd>

</ln>

</para>

<para l="1411" t="12451" r="5818" b="13963" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="1622" t="12451" r="5803" b="12648" baseLine="12595" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="12451" r="2338" b="12648">Bigrams</wd>

<space/>

<wd l="2414" t="12499" r="2822" b="12605">were</wd>

<space/>

<wd l="2904" t="12451" r="3662" b="12605">collected</wd>

<space/>

<wd l="3734" t="12451" r="4147" b="12605">from</wd>

<space/>

<wd l="4224" t="12499" r="4502" b="12605">our</wd>

<space/>

<wd l="4584" t="12475" r="4810" b="12605">set</wd>

<space/>

<wd l="4886" t="12451" r="5069" b="12605">of</wd>

<space/>

<wd l="5131" t="12475" r="5386" b="12605">ten</wd>

<space/>

<wd l="5462" t="12451" r="5803" b="12600">mil-</wd>

</ln>

<ln l="1411" t="12715" r="5803" b="12912" baseLine="12859" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="12715" r="1742" b="12869">lion</wd>

<space/>

<wd l="1795" t="12739" r="2376" b="12869">tweets.</wd>

<space/>

<wd l="2453" t="12715" r="3163" b="12912">Bigrams</wd>

<space/>

<wd l="3221" t="12715" r="3600" b="12869">with</wd>

<space/>

<wd l="3658" t="12763" r="3950" b="12912">any</wd>

<space/>

<wd l="4013" t="12715" r="5520" b="12912">out-of-vocabulary</wd>

<space/>

<wd l="5578" t="12739" r="5803" b="12869">to-</wd>

</ln>

<ln l="1411" t="12979" r="5818" b="13176" baseLine="13123" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="12979" r="1795" b="13133">kens</wd>

<space/>

<wd l="1867" t="13027" r="2280" b="13133">were</wd>

<space/>

<wd l="2352" t="12979" r="3038" b="13176">ignored.</wd>

<space/>

<wd l="3168" t="12984" r="3634" b="13133">From</wd>

<space/>

<wd l="3701" t="12979" r="3998" b="13133">this</wd>

<space/>

<wd l="4085" t="13003" r="4354" b="13162">set,</wd>

<space/>

<wd l="4440" t="12979" r="4685" b="13133">for</wd>

<space/>

<wd l="4762" t="12979" r="5141" b="13133">each</wd>

<space/>

<wd l="5213" t="12979" r="5818" b="13176">bigram</wd>

<space/>

</ln>

<ln l="1430" t="13224" r="5808" b="13445" baseLine="13389">

<wd l="1430" t="13224" r="1709" b="13445"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="1762" t="13224" r="2050" b="13440"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2170" t="13286" r="2414" b="13392">we</wd>

<space/>

<wd l="2520" t="13238" r="3350" b="13435">computed</wd>

<space/>

</run>

<wd l="3437" t="13224" r="4387" b="13445"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">prob</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">2</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1250" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">|</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">t</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="750" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4507" t="13238" r="4882" b="13392">with</wd>

<space/>

<wd l="4978" t="13238" r="5808" b="13435">Laplacian</wd>

<space/>

</run>

</ln>

<ln l="1421" t="13502" r="5803" b="13699" baseLine="13646" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="13502" r="2347" b="13699">smoothing.</wd>

<space/>

<wd l="2482" t="13502" r="2986" b="13656">These</wd>

<space/>

<wd l="3058" t="13502" r="3701" b="13656">became</wd>

<space/>

<wd l="3778" t="13502" r="4032" b="13656">the</wd>

<space/>

<wd l="4109" t="13502" r="4906" b="13656">transition</wd>

<space/>

<wd l="4978" t="13502" r="5803" b="13699">probabili-</wd>

</ln>

<ln l="1411" t="13766" r="3696" b="13963" baseLine="13910" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="13766" r="1699" b="13920">ties</wd>

<space/>

<wd l="1762" t="13766" r="1925" b="13915">in</wd>

<space/>

<wd l="1982" t="13814" r="2261" b="13920">our</wd>

<space/>

<wd l="2314" t="13766" r="2899" b="13920">Viterbi</wd>

<space/>

<wd l="2952" t="13766" r="3696" b="13963">problem.</wd>

</ln>

</para>

<para l="1411" t="14030" r="5813" b="14755" alignment="justified" spaceBefore="3" spaceAfter="1" fli="216" lsp="exactly" lspExact="263" language="en">

<ln l="1622" t="14030" r="5813" b="14227" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1622" t="14035" r="1838" b="14184">At</wd>

<space/>

<wd l="1930" t="14054" r="2227" b="14184">test</wd>

<space/>

<wd l="2314" t="14030" r="2736" b="14213">time,</wd>

<space/>

<wd l="2846" t="14078" r="3091" b="14184">we</wd>

<space/>

<wd l="3192" t="14030" r="4008" b="14227">generated</wd>

<space/>

<wd l="4104" t="14030" r="4987" b="14184">candidates</wd>

<space/>

<wd l="5088" t="14030" r="5333" b="14184">for</wd>

<space/>

<wd l="5429" t="14030" r="5813" b="14184">each</wd>

<space/>

</ln>

<ln l="1411" t="14294" r="5808" b="14491" baseLine="14438" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="14294" r="1920" b="14448">token.</wd>

<space/>

<wd l="2098" t="14294" r="2251" b="14443">If</wd>

<space/>

<wd l="2323" t="14294" r="2578" b="14448">the</wd>

<space/>

<wd l="2678" t="14294" r="3653" b="14448">substitution</wd>

<space/>

<wd l="3739" t="14294" r="4003" b="14448">list</wd>

<space/>

<wd l="4085" t="14294" r="4392" b="14448">had</wd>

<space/>

<wd l="4478" t="14342" r="4670" b="14448">an</wd>

<space/>

<wd l="4766" t="14318" r="5189" b="14491">entry</wd>

<space/>

<wd l="5280" t="14294" r="5530" b="14448">for</wd>

<space/>

<wd l="5616" t="14342" r="5808" b="14448">an</wd>

<space/>

</ln>

<ln l="1416" t="14558" r="5803" b="14755" baseLine="14702" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="14558" r="2069" b="14755">original</wd>

<space/>

<wd l="2122" t="14558" r="2635" b="14741">token,</wd>

<space/>

<wd l="2698" t="14558" r="2899" b="14712">all</wd>

<space/>

<wd l="2966" t="14558" r="3792" b="14755">suggested</wd>

<space/>

<wd l="3850" t="14558" r="4906" b="14712">substitutions</wd>

<space/>

<wd l="4958" t="14558" r="5126" b="14707">in</wd>

<space/>

<wd l="5174" t="14558" r="5496" b="14712">that</wd>

<space/>

<wd l="5549" t="14606" r="5803" b="14712">en-</wd>

</ln>

<ln l="0" t="0" r="0" b="0" baseLine="0" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<space/>

</ln>

</para>

</column>

<column l="6078" t="2141" r="10513" b="14764">

<para l="6091" t="2198" r="10483" b="2659" alignment="justified" spaceBefore="1" lsp="exactly" lspExact="262" language="en">

<ln l="6091" t="2198" r="10483" b="2395" baseLine="2342" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">

<wd l="6091" t="2222" r="6322" b="2395">try</wd>

<space/>

<wd l="6422" t="2198" r="7066" b="2352">became</wd>

<space/>

<wd l="7166" t="2198" r="8093" b="2352">candidates.</wd>

<space/>

<wd l="8299" t="2203" r="8592" b="2352">For</wd>

<space/>

<wd l="8688" t="2198" r="9072" b="2352">each</wd>

<space/>

<wd l="9168" t="2198" r="9350" b="2352">of</wd>

<space/>

<wd l="9437" t="2198" r="9869" b="2352">these</wd>

<space/>

<wd l="9970" t="2198" r="10483" b="2352">candi-</wd>

</ln>

<ln l="6096" t="2462" r="10483" b="2659" baseLine="2606">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8"><wd l="6096" t="2462" r="6571" b="2645">dates,</wd>

<space/>

</run>

<wd l="6682" t="2515" r="6811" b="2645"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">c</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8"><space/>

<wd l="6917" t="2510" r="7162" b="2616">we</wd>

<space/>

<wd l="7253" t="2462" r="7987" b="2616">initialize</wd>

<space/>

<wd l="8083" t="2510" r="8174" b="2616">a</wd>

<space/>

<wd l="8261" t="2462" r="8880" b="2659">weight,</wd>

<space/>

</run>

<wd l="8990" t="2515" r="9264" b="2645"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">w</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">,</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8"><space/>

<wd l="9370" t="2462" r="9883" b="2616">where</wd>

<space/>

</run>

<wd l="9979" t="2515" r="10200" b="2645"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">w</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8"><space/>

<wd l="10339" t="2530" r="10483" b="2587">=</wd>

</run>

</ln>

</para>

<para l="6091" t="2707" r="10502" b="5813" alignment="justified" lsp="exactly" lspExact="262" language="en">

<ln l="6101" t="2707" r="10483" b="2923" baseLine="2870">

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6101" t="2726" r="6187" b="2870">2</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1250" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><wd l="6293" t="2765" r="6398" b="2870">×</wd>

<space/>

</run>

<wd l="6514" t="2707" r="7272" b="2923"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">rank</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">c</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="7368" t="2746" r="7512" b="2890">+</wd>

<space/>

<wd l="7603" t="2707" r="7819" b="2923">1),</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7920" t="2722" r="8222" b="2875">and</wd>

<space/>

</run>

<wd l="8304" t="2707" r="8995" b="2923"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">rank</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">c</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9091" t="2722" r="9571" b="2875">refers</wd>

<space/>

<wd l="9653" t="2746" r="9816" b="2875">to</wd>

<space/>

</run>

<wd l="9902" t="2726" r="10123" b="2875"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">c</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">’s</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="10210" t="2770" r="10483" b="2918">po-</wd>

</run>

</ln>

<ln l="6101" t="2986" r="10488" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6101" t="2986" r="6562" b="3139">sition</wd>

<space/>

<wd l="6629" t="2986" r="6797" b="3134">in</wd>

<space/>

<wd l="6864" t="2986" r="7118" b="3139">the</wd>

<space/>

<wd l="7190" t="2986" r="7450" b="3139">list</wd>

<space/>

<wd l="7517" t="2986" r="7762" b="3139">for</wd>

<space/>

<wd l="7829" t="2986" r="8083" b="3139">the</wd>

<space/>

<wd l="8160" t="3010" r="8760" b="3139">current</wd>

<space/>

<wd l="8822" t="2986" r="9427" b="3139">token’s</wd>

<space/>

<wd l="9504" t="3010" r="9931" b="3182">entry</wd>

<space/>

<wd l="10003" t="2986" r="10166" b="3134">in</wd>

<space/>

<wd l="10234" t="2986" r="10488" b="3139">the</wd>

<space/>

</ln>

<ln l="6101" t="3250" r="10488" b="3446" baseLine="3394" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6101" t="3250" r="7080" b="3403">substitution</wd>

<space/>

<wd l="7133" t="3250" r="7430" b="3403">list.</wd>

<space/>

<wd l="7512" t="3250" r="7666" b="3398">If</wd>

<space/>

<wd l="7704" t="3250" r="7963" b="3403">the</wd>

<space/>

<wd l="8021" t="3250" r="8467" b="3446">“ing”</wd>

<space/>

<wd l="8530" t="3250" r="8856" b="3403">rule</wd>

<space/>

<wd l="8914" t="3298" r="9086" b="3403">or</wd>

<space/>

<wd l="9139" t="3250" r="9398" b="3403">the</wd>

<space/>

<wd l="9456" t="3250" r="10104" b="3403">“coool”</wd>

<space/>

<wd l="10162" t="3250" r="10488" b="3403">rule</wd>

<space/>

</ln>

<ln l="6096" t="3514" r="10478" b="3710" baseLine="3658" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6096" t="3514" r="6917" b="3710">generated</wd>

<space/>

<wd l="6984" t="3562" r="7704" b="3696">answers,</wd>

<space/>

<wd l="7781" t="3514" r="8222" b="3667">those</wd>

<space/>

<wd l="8290" t="3514" r="8818" b="3667">would</wd>

<space/>

<wd l="8885" t="3514" r="9216" b="3667">also</wd>

<space/>

<wd l="9283" t="3514" r="9480" b="3667">be</wd>

<space/>

<wd l="9552" t="3514" r="10478" b="3667">candidates.</wd>

<space/>

</ln>

<ln l="6091" t="3773" r="10502" b="3970" baseLine="3922" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="3778" r="6384" b="3926">For</wd>

<space/>

<wd l="6456" t="3773" r="6715" b="3926">the</wd>

<space/>

<wd l="6797" t="3773" r="7334" b="3926">“cool”</wd>

<space/>

<wd l="7416" t="3773" r="7790" b="3955">rule,</wd>

<space/>

<wd l="7877" t="3773" r="8136" b="3926">the</wd>

<space/>

<wd l="8213" t="3773" r="8789" b="3970">weight</wd>

<space/>

<wd l="8861" t="3821" r="9182" b="3926">was</wd>

<space/>

<wd l="9264" t="3773" r="9518" b="3926">the</wd>

<space/>

<wd l="9595" t="3773" r="10243" b="3926">number</wd>

<space/>

<wd l="10320" t="3773" r="10502" b="3926">of</wd>

<space/>

</ln>

<ln l="6096" t="4037" r="10483" b="4234" baseLine="4186" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6096" t="4037" r="6850" b="4190">deletions</wd>

<space/>

<wd l="6902" t="4037" r="7608" b="4234">required</wd>

<space/>

<wd l="7651" t="4061" r="7810" b="4190">to</wd>

<space/>

<wd l="7867" t="4061" r="8122" b="4234">get</wd>

<space/>

<wd l="8165" t="4037" r="8419" b="4190">the</wd>

<space/>

<wd l="8472" t="4037" r="9274" b="4190">candidate</wd>

<space/>

<wd l="9322" t="4037" r="9734" b="4190">from</wd>

<space/>

<wd l="9778" t="4037" r="10032" b="4190">the</wd>

<space/>

<wd l="10085" t="4037" r="10483" b="4234">orig-</wd>

</ln>

<ln l="6091" t="4301" r="10483" b="4498" baseLine="4445" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="4301" r="6408" b="4454">inal</wd>

<space/>

<wd l="6456" t="4301" r="6965" b="4454">token.</wd>

<space/>

<wd l="7042" t="4306" r="7334" b="4454">For</wd>

<space/>

<wd l="7378" t="4301" r="7637" b="4454">the</wd>

<space/>

<wd l="7690" t="4301" r="8136" b="4498">“ing”</wd>

<space/>

<wd l="8189" t="4301" r="8563" b="4483">rule,</wd>

<space/>

<wd l="8621" t="4301" r="8875" b="4454">the</wd>

<space/>

<wd l="8923" t="4301" r="9504" b="4498">weight</wd>

<space/>

<wd l="9547" t="4349" r="9917" b="4483">was,</wd>

<space/>

<wd l="9984" t="4349" r="10483" b="4454">some-</wd>

</ln>

<ln l="6091" t="4565" r="10493" b="4762" baseLine="4709">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6091" t="4565" r="6509" b="4718">what</wd>

<space/>

<wd l="6586" t="4565" r="7454" b="4762">arbitrarily,</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7565" t="4570" r="7694" b="4718">1.</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7843" t="4565" r="8472" b="4762">Finally,</wd>

<space/>

<wd l="8563" t="4565" r="8822" b="4718">the</wd>

<space/>

<wd l="8904" t="4565" r="9557" b="4762">original</wd>

<space/>

<wd l="9634" t="4565" r="10104" b="4718">token</wd>

<space/>

<wd l="10181" t="4565" r="10315" b="4718">is</wd>

<space/>

<wd l="10402" t="4613" r="10493" b="4718">a</wd>

<space/>

</run>

</ln>

<ln l="6096" t="4824" r="10488" b="5021" baseLine="4973">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6096" t="4824" r="6902" b="4978">candidate</wd>

<space/>

<wd l="6974" t="4824" r="7354" b="4978">with</wd>

<space/>

<wd l="7430" t="4872" r="7522" b="4978">a</wd>

<space/>

<wd l="7594" t="4824" r="8170" b="5021">weight</wd>

<space/>

<wd l="8246" t="4824" r="8429" b="4978">of</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8496" t="4829" r="8635" b="4978">0.</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8774" t="4824" r="9274" b="4978">These</wd>

<space/>

<wd l="9350" t="4824" r="10003" b="5021">weights</wd>

<space/>

<wd l="10080" t="4872" r="10488" b="4978">were</wd>

<space/>

</run>

</ln>

<ln l="6091" t="5088" r="10488" b="5285" baseLine="5237" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="5088" r="6350" b="5242">the</wd>

<space/>

<wd l="6403" t="5088" r="7152" b="5242">emission</wd>

<space/>

<wd l="7195" t="5088" r="7848" b="5285">weights</wd>

<space/>

<wd l="7901" t="5088" r="8150" b="5242">for</wd>

<space/>

<wd l="8194" t="5088" r="8827" b="5270">Viterbi,</wd>

<space/>

<wd l="8890" t="5088" r="9187" b="5242">and</wd>

<space/>

<wd l="9235" t="5136" r="9643" b="5242">were</wd>

<space/>

<wd l="9691" t="5088" r="10267" b="5242">treated</wd>

<space/>

<wd l="10320" t="5136" r="10488" b="5242">as</wd>

<space/>

</ln>

<ln l="6106" t="5338" r="10498" b="5554" baseLine="5496">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1250" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2"><wd l="6106" t="5443" r="6240" b="5453">−</wd>

<space/>

</run>

<wd l="6298" t="5338" r="7426" b="5554"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">log(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">prob</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">c</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7555" t="5357" r="7848" b="5506">For</wd>

<space/>

<wd l="7920" t="5352" r="8304" b="5506">each</wd>

<space/>

<wd l="8376" t="5352" r="9029" b="5549">original</wd>

<space/>

<wd l="9101" t="5352" r="9566" b="5506">token</wd>

<space/>

<wd l="9638" t="5352" r="9802" b="5501">in</wd>

<space/>

<wd l="9869" t="5352" r="10128" b="5506">the</wd>

<space/>

<wd l="10200" t="5376" r="10498" b="5506">test</wd>

<space/>

</run>

</ln>

<ln l="6101" t="5616" r="10320" b="5813" baseLine="5760">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6101" t="5640" r="6370" b="5798">set,</wd>

<space/>

<wd l="6432" t="5664" r="6677" b="5770">we</wd>

<space/>

<wd l="6739" t="5616" r="7555" b="5813">generated</wd>

<space/>

<wd l="7613" t="5664" r="7819" b="5770">on</wd>

<space/>

<wd l="7877" t="5664" r="8520" b="5813">average</wd>

<space/>

</run>

<wd l="8592" t="5621" r="8842" b="5770"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">04</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8904" t="5616" r="9336" b="5770">other</wd>

<space/>

<wd l="9394" t="5616" r="10320" b="5770">candidates.</wd>

</run>

</ln>

</para>

<para l="6091" t="5880" r="10493" b="7392" alignment="justified" fli="216" lsp="exactly" lspExact="262" language="en">

<ln l="6302" t="5880" r="10488" b="6077" baseLine="6024" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="5880" r="6629" b="6034">The</wd>

<space/>

<wd l="6734" t="5904" r="7315" b="6077">system</wd>

<space/>

<wd l="7406" t="5880" r="7771" b="6034">then</wd>

<space/>

<wd l="7867" t="5880" r="8842" b="6034">constructed</wd>

<space/>

<wd l="8938" t="5928" r="9029" b="6034">a</wd>

<space/>

<wd l="9120" t="5880" r="9634" b="6034">lattice</wd>

<space/>

<wd l="9730" t="5880" r="10142" b="6034">from</wd>

<space/>

<wd l="10234" t="5880" r="10488" b="6034">the</wd>

<space/>

</ln>

<ln l="6091" t="6144" r="10493" b="6298" baseLine="6288" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6168" r="6557" b="6298">tweet</wd>

<space/>

<wd l="6624" t="6144" r="6922" b="6298">and</wd>

<space/>

<wd l="6994" t="6144" r="7195" b="6298">all</wd>

<space/>

<wd l="7272" t="6144" r="7459" b="6298">of</wd>

<space/>

<wd l="7512" t="6144" r="7709" b="6298">its</wd>

<space/>

<wd l="7781" t="6144" r="8957" b="6298">normalization</wd>

<space/>

<wd l="9029" t="6144" r="9955" b="6298">candidates.</wd>

<space/>

<wd l="10075" t="6144" r="10493" b="6298">With</wd>

<space/>

</ln>

<ln l="6091" t="6403" r="10483" b="6600" baseLine="6552" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6403" r="6677" b="6557">Viterbi</wd>

<space/>

<wd l="6778" t="6403" r="7502" b="6600">dynamic</wd>

<space/>

<wd l="7598" t="6403" r="8746" b="6600">programming</wd>

<space/>

<wd l="8846" t="6403" r="8966" b="6557">it</wd>

<space/>

<wd l="9058" t="6403" r="9552" b="6557">found</wd>

<space/>

<wd l="9643" t="6403" r="9902" b="6557">the</wd>

<space/>

<wd l="9998" t="6403" r="10483" b="6557">maxi-</wd>

</ln>

<ln l="6091" t="6667" r="10493" b="6864" baseLine="6811" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6715" r="6533" b="6821">mum</wd>

<space/>

<wd l="6590" t="6667" r="7512" b="6864">probability</wd>

<space/>

<wd l="7584" t="6667" r="7944" b="6864">path</wd>

<space/>

<wd l="8011" t="6667" r="8669" b="6864">through</wd>

<space/>

<wd l="8731" t="6667" r="8986" b="6821">the</wd>

<space/>

<wd l="9053" t="6667" r="9610" b="6821">lattice.</wd>

<space/>

<wd l="9715" t="6667" r="10258" b="6821">Words</wd>

<space/>

<wd l="10330" t="6667" r="10493" b="6816">in</wd>

<space/>

</ln>

<ln l="6091" t="6931" r="10493" b="7128" baseLine="7075" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6931" r="6350" b="7085">the</wd>

<space/>

<wd l="6408" t="6931" r="7147" b="7085">maximal</wd>

<space/>

<wd l="7210" t="6931" r="7574" b="7128">path</wd>

<space/>

<wd l="7632" t="6979" r="8040" b="7085">were</wd>

<space/>

<wd l="8102" t="6931" r="8558" b="7085">taken</wd>

<space/>

<wd l="8616" t="6955" r="8774" b="7085">to</wd>

<space/>

<wd l="8837" t="6931" r="9034" b="7085">be</wd>

<space/>

<wd l="9096" t="6931" r="9350" b="7085">the</wd>

<space/>

<wd l="9418" t="6955" r="10003" b="7085">correct</wd>

<space/>

<wd l="10061" t="6931" r="10493" b="7085">word</wd>

<space/>

</ln>

<ln l="6091" t="7195" r="8472" b="7392" baseLine="7339" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="7195" r="6341" b="7349">for</wd>

<space/>

<wd l="6394" t="7195" r="6648" b="7349">the</wd>

<space/>

<wd l="6710" t="7195" r="7910" b="7392">corresponding</wd>

<space/>

<wd l="7968" t="7195" r="8472" b="7349">token.</wd>

</ln>

</para>

<para l="6091" t="7459" r="10498" b="12648" alignment="justified" fli="216" lsp="exactly" lspExact="262" language="en">

<ln l="6302" t="7459" r="10483" b="7656" baseLine="7603" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6302" t="7464" r="6518" b="7613">At</wd>

<space/>

<wd l="6562" t="7459" r="6821" b="7613">the</wd>

<space/>

<wd l="6869" t="7459" r="7243" b="7613">time</wd>

<space/>

<wd l="7301" t="7459" r="7483" b="7613">of</wd>

<space/>

<wd l="7517" t="7459" r="7776" b="7613">the</wd>

<space/>

<wd l="7834" t="7459" r="8376" b="7613">shared</wd>

<space/>

<wd l="8424" t="7459" r="8808" b="7642">task,</wd>

<space/>

<wd l="8866" t="7464" r="8933" b="7608">I</wd>

<space/>

<wd l="8990" t="7459" r="9821" b="7656">compared</wd>

<space/>

<wd l="9869" t="7459" r="10166" b="7613">this</wd>

<space/>

<wd l="10229" t="7507" r="10483" b="7656">ap-</wd>

</ln>

<ln l="6091" t="7718" r="10483" b="7915" baseLine="7867" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="7718" r="6667" b="7915">proach</wd>

<space/>

<wd l="6715" t="7742" r="6874" b="7872">to</wd>

<space/>

<wd l="6926" t="7766" r="7018" b="7872">a</wd>

<space/>

<wd l="7070" t="7718" r="7699" b="7915">simpler</wd>

<space/>

<wd l="7747" t="7718" r="8558" b="7915">approach,</wd>

<space/>

<wd l="8616" t="7718" r="8779" b="7867">in</wd>

<space/>

<wd l="8827" t="7718" r="9384" b="7901">which,</wd>

<space/>

<wd l="9442" t="7718" r="9691" b="7872">for</wd>

<space/>

<wd l="9739" t="7766" r="10032" b="7915">any</wd>

<space/>

<wd l="10085" t="7718" r="10483" b="7915">orig-</wd>

</ln>

<ln l="6091" t="7982" r="10483" b="8179" baseLine="8131" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="7982" r="6408" b="8136">inal</wd>

<space/>

<wd l="6490" t="7982" r="6998" b="8165">token,</wd>

<space/>

<wd l="7094" t="7982" r="7349" b="8136">the</wd>

<space/>

<wd l="7440" t="8006" r="8026" b="8179">system</wd>

<space/>

<wd l="8098" t="7982" r="8746" b="8179">ignored</wd>

<space/>

<wd l="8827" t="7982" r="9082" b="8136">the</wd>

<space/>

<wd l="9168" t="8006" r="9787" b="8136">context</wd>

<space/>

<wd l="9864" t="7982" r="10166" b="8136">and</wd>

<space/>

<wd l="10253" t="8030" r="10483" b="8136">se-</wd>

</ln>

<ln l="6091" t="8246" r="10493" b="8443" baseLine="8390" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="8246" r="6600" b="8400">lected</wd>

<space/>

<wd l="6667" t="8246" r="6922" b="8400">the</wd>

<space/>

<wd l="6994" t="8246" r="8170" b="8400">normalization</wd>

<space/>

<wd l="8242" t="8246" r="8616" b="8400">with</wd>

<space/>

<wd l="8683" t="8246" r="8942" b="8400">the</wd>

<space/>

<wd l="9019" t="8270" r="9672" b="8443">greatest</wd>

<space/>

<wd l="9744" t="8246" r="10493" b="8400">emission</wd>

<space/>

</ln>

<ln l="6101" t="8510" r="10483" b="8698" baseLine="8654" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6101" t="8558" r="6538" b="8664">score</wd>

<space/>

<wd l="6634" t="8510" r="7445" b="8698">(emission</wd>

<space/>

<wd l="7541" t="8558" r="7973" b="8664">score</wd>

<space/>

<wd l="8069" t="8558" r="8232" b="8664">as</wd>

<space/>

<wd l="8328" t="8510" r="8947" b="8664">defined</wd>

<space/>

<wd l="9034" t="8510" r="9638" b="8698">above).</wd>

<space/>

<wd l="9816" t="8515" r="10027" b="8664">At</wd>

<space/>

<wd l="10114" t="8510" r="10483" b="8693">first,</wd>

<space/>

</ln>

<ln l="6091" t="8765" r="10478" b="8966" baseLine="8918">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6091" t="8770" r="6350" b="8923">the</wd>

<space/>

<wd l="6427" t="8770" r="7013" b="8923">Viterbi</wd>

<space/>

<wd l="7090" t="8770" r="7728" b="8923">method</wd>

<space/>

<wd l="7810" t="8770" r="8309" b="8923">added</wd>

<space/>

<wd l="8405" t="8774" r="8592" b="8923">10</wd>

<space/>

<wd l="8674" t="8794" r="9586" b="8966">percentage</wd>

<space/>

<wd l="9667" t="8770" r="10176" b="8966">points</wd>

<space/>

</run>

<wd l="10272" t="8765" r="10478" b="8966"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">f</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

</ln>

<ln l="6096" t="9034" r="10498" b="9216" baseLine="9182" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6096" t="9082" r="6466" b="9187">over</wd>

<space/>

<wd l="6514" t="9034" r="6816" b="9187">this</wd>

<space/>

<wd l="6874" t="9034" r="7546" b="9187">method.</wd>

<space/>

<wd l="7627" t="9038" r="8424" b="9216">However,</wd>

<space/>

<wd l="8491" t="9034" r="8875" b="9187">after</wd>

<space/>

<wd l="8923" t="9034" r="9182" b="9187">the</wd>

<space/>

<wd l="9235" t="9034" r="9490" b="9187">the</wd>

<space/>

<wd l="9557" t="9034" r="10099" b="9187">shared</wd>

<space/>

<wd l="10152" t="9034" r="10498" b="9187">task</wd>

<space/>

</ln>

<ln l="6091" t="9298" r="10493" b="9494" baseLine="9442" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="9346" r="6413" b="9451">was</wd>

<space/>

<wd l="6475" t="9298" r="7186" b="9480">finished,</wd>

<space/>

<wd l="7248" t="9302" r="7315" b="9446">I</wd>

<space/>

<wd l="7373" t="9298" r="8280" b="9451">discovered</wd>

<space/>

<wd l="8333" t="9298" r="8654" b="9451">that</wd>

<space/>

<wd l="8702" t="9298" r="8957" b="9451">the</wd>

<space/>

<wd l="9014" t="9298" r="10493" b="9494">greatest-emission</wd>

<space/>

</ln>

<ln l="6091" t="9562" r="10483" b="9758" baseLine="9706" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="9562" r="6725" b="9715">method</wd>

<space/>

<wd l="6778" t="9562" r="7080" b="9715">had</wd>

<space/>

<wd l="7133" t="9610" r="7325" b="9715">an</wd>

<space/>

<wd l="7382" t="9610" r="7814" b="9715">error.</wd>

<space/>

<wd l="7896" t="9562" r="8510" b="9758">Having</wd>

<space/>

<wd l="8568" t="9562" r="8981" b="9715">fixed</wd>

<space/>

<wd l="9034" t="9562" r="9355" b="9715">that</wd>

<space/>

<wd l="9403" t="9610" r="9845" b="9744">error,</wd>

<space/>

<wd l="9907" t="9562" r="10210" b="9715">and</wd>

<space/>

<wd l="10258" t="9610" r="10483" b="9715">re-</wd>

</ln>

<ln l="6091" t="9811" r="10493" b="10018" baseLine="9970" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="9821" r="6744" b="10018">running</wd>

<space/>

<wd l="6797" t="9821" r="7056" b="9974">the</wd>

<space/>

<wd l="7114" t="9845" r="7694" b="10018">system</wd>

<space/>

<wd l="7742" t="9869" r="7949" b="9974">on</wd>

<space/>

<wd l="7997" t="9821" r="8256" b="9974">the</wd>

<space/>

<wd l="8304" t="9821" r="8957" b="10018">training</wd>

<space/>

<wd l="9014" t="9821" r="9408" b="10003">data,</wd>

<space/>

<wd l="9466" t="9826" r="9533" b="9970">I</wd>

<space/>

<wd l="9586" t="9821" r="10493" b="9974">discovered</wd>

<space/>

</ln>

<ln l="6091" t="10085" r="10483" b="10282" baseLine="10234" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="10085" r="6413" b="10238">that</wd>

<space/>

<wd l="6466" t="10085" r="6763" b="10238">this</wd>

<space/>

<wd l="6830" t="10085" r="8304" b="10282">greatest-emission</wd>

<space/>

<wd l="8362" t="10085" r="8731" b="10267">rule,</wd>

<space/>

<wd l="8798" t="10133" r="9010" b="10238">on</wd>

<space/>

<wd l="9062" t="10085" r="9317" b="10238">the</wd>

<space/>

<wd l="9374" t="10085" r="10027" b="10282">training</wd>

<space/>

<wd l="10090" t="10085" r="10483" b="10267">data,</wd>

<space/>

</ln>

<ln l="6096" t="10349" r="10493" b="10546" baseLine="10493" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6096" t="10349" r="6523" b="10546">gives</wd>

<space/>

<wd l="6590" t="10349" r="7075" b="10502">better</wd>

<space/>

<wd l="7138" t="10349" r="7685" b="10502">results</wd>

<space/>

<wd l="7752" t="10349" r="8112" b="10502">than</wd>

<space/>

<wd l="8179" t="10349" r="8434" b="10502">the</wd>

<space/>

<wd l="8496" t="10349" r="9082" b="10502">Viterbi</wd>

<space/>

<wd l="9154" t="10373" r="9739" b="10546">system</wd>

<space/>

<wd l="9797" t="10349" r="10186" b="10502">used</wd>

<space/>

<wd l="10243" t="10349" r="10493" b="10502">for</wd>

<space/>

</ln>

<ln l="6091" t="10608" r="10493" b="10810" baseLine="10757">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6091" t="10613" r="6350" b="10766">the</wd>

<space/>

<wd l="6422" t="10613" r="6965" b="10766">shared</wd>

<space/>

<wd l="7022" t="10613" r="7411" b="10766">task:</wd>

<space/>

<wd l="7507" t="10613" r="7752" b="10766">for</wd>

<space/>

</run>

<wd l="7824" t="10608" r="8030" b="10810"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">f</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="8117" t="10661" r="8678" b="10795">scores,</wd>

<space/>

<wd l="8750" t="10613" r="9005" b="10766">the</wd>

<space/>

<wd l="9067" t="10613" r="9653" b="10766">Viterbi</wd>

<space/>

<wd l="9720" t="10613" r="10493" b="10810">approach</wd>

<space/>

</run>

</ln>

<ln l="6096" t="10877" r="10478" b="11074" baseLine="11021">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6096" t="10901" r="6427" b="11074">gets</wd>

<space/>

<wd l="6485" t="10925" r="6576" b="11030">a</wd>

<space/>

</run>

<wd l="6634" t="10882" r="6989" b="11030"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">.</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">768</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="7042" t="10877" r="7507" b="11030">while</wd>

<space/>

<wd l="7555" t="10877" r="7810" b="11030">the</wd>

<space/>

<wd l="7862" t="10877" r="9341" b="11074">greatest-emission</wd>

<space/>

<wd l="9394" t="10925" r="9830" b="11030">score</wd>

<space/>

<wd l="9878" t="10877" r="10013" b="11030">is</wd>

<space/>

</run>

<wd l="10080" t="10882" r="10478" b="11030"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">.</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">816.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

</ln>

<ln l="6091" t="11136" r="10483" b="11333" baseLine="11285" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="11141" r="6499" b="11290">Note</wd>

<space/>

<wd l="6581" t="11136" r="6946" b="11318">that,</wd>

<space/>

<wd l="7037" t="11136" r="7286" b="11290">for</wd>

<space/>

<wd l="7363" t="11136" r="7622" b="11290">the</wd>

<space/>

<wd l="7704" t="11136" r="8290" b="11290">Viterbi</wd>

<space/>

<wd l="8371" t="11136" r="9187" b="11333">approach,</wd>

<space/>

<wd l="9283" t="11136" r="9538" b="11290">the</wd>

<space/>

<wd l="9619" t="11160" r="9917" b="11290">test</wd>

<space/>

<wd l="10003" t="11184" r="10483" b="11318">score,</wd>

<space/>

</ln>

<ln l="6106" t="11400" r="10493" b="11597" baseLine="11544">

<wd l="6106" t="11405" r="6514" b="11582"><run italic="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">.</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">757,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6610" t="11400" r="6744" b="11554">is</wd>

<space/>

<wd l="6835" t="11424" r="7109" b="11554">not</wd>

<space/>

<wd l="7186" t="11400" r="7656" b="11554">much</wd>

<space/>

<wd l="7738" t="11400" r="8050" b="11554">less</wd>

<space/>

<wd l="8136" t="11400" r="8496" b="11554">than</wd>

<space/>

<wd l="8582" t="11400" r="8842" b="11554">the</wd>

<space/>

<wd l="8923" t="11400" r="9576" b="11597">training</wd>

<space/>

<wd l="9672" t="11448" r="10147" b="11554">score.</wd>

<space/>

<wd l="10315" t="11405" r="10493" b="11549">In</wd>

<space/>

</run>

</ln>

<ln l="6101" t="11664" r="10483" b="11861" baseLine="11808" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6101" t="11712" r="6907" b="11861">summary,</wd>

<space/>

<wd l="6984" t="11669" r="7210" b="11851">(1)</wd>

<space/>

<wd l="7272" t="11664" r="7526" b="11818">the</wd>

<space/>

<wd l="7584" t="11664" r="8170" b="11818">Viterbi</wd>

<space/>

<wd l="8227" t="11664" r="9043" b="11861">approach,</wd>

<space/>

<wd l="9110" t="11712" r="9274" b="11818">as</wd>

<space/>

<wd l="9336" t="11664" r="10483" b="11861">implemented,</wd>

<space/>

</ln>

<ln l="6091" t="11928" r="10483" b="12125" baseLine="12072" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="11928" r="6230" b="12082">is</wd>

<space/>

<wd l="6293" t="11928" r="7037" b="12125">probably</wd>

<space/>

<wd l="7099" t="11952" r="7378" b="12082">not</wd>

<space/>

<wd l="7430" t="11928" r="7685" b="12082">the</wd>

<space/>

<wd l="7747" t="11928" r="8131" b="12110">best,</wd>

<space/>

<wd l="8203" t="11928" r="8506" b="12082">and</wd>

<space/>

<wd l="8573" t="11933" r="8803" b="12115">(2)</wd>

<space/>

<wd l="8866" t="11928" r="9125" b="12082">the</wd>

<space/>

<wd l="9187" t="11928" r="9763" b="12082">overall</wd>

<space/>

<wd l="9821" t="11928" r="10483" b="12082">normal-</wd>

</ln>

<ln l="6091" t="12187" r="10488" b="12384" baseLine="12336" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="12187" r="6667" b="12341">ization</wd>

<space/>

<wd l="6730" t="12187" r="7498" b="12384">approach</wd>

<space/>

<wd l="7555" t="12192" r="7622" b="12336">I</wd>

<space/>

<wd l="7685" t="12187" r="8381" b="12341">describe</wd>

<space/>

<wd l="8438" t="12187" r="8602" b="12336">in</wd>

<space/>

<wd l="8659" t="12187" r="8957" b="12341">this</wd>

<space/>

<wd l="9019" t="12235" r="9490" b="12384">paper</wd>

<space/>

<wd l="9542" t="12187" r="9682" b="12341">is</wd>

<space/>

<wd l="9739" t="12187" r="10488" b="12384">probably</wd>

<space/>

</ln>

<ln l="6091" t="12451" r="9634" b="12648" baseLine="12595" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="12451" r="6576" b="12605">better</wd>

<space/>

<wd l="6629" t="12451" r="6989" b="12605">than</wd>

<space/>

<wd l="7046" t="12451" r="7301" b="12605">the</wd>

<space/>

<wd l="7368" t="12451" r="7910" b="12605">shared</wd>

<space/>

<wd l="7963" t="12451" r="8309" b="12605">task</wd>

<space/>

<wd l="8357" t="12451" r="8904" b="12605">results</wd>

<space/>

<wd l="8971" t="12475" r="9634" b="12648">suggest.</wd>

</ln>

</para>

<para l="6091" t="12902" r="8496" b="13114" alignment="left" spaceBefore="205" lsp="exactly" lspExact="267" language="en">

<ln l="6091" t="12902" r="8496" b="13114" baseLine="13061" bold="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="8">

<wd l="6091" t="12902" r="6202" b="13070">3</wd>

<space/>

<wd l="6446" t="12907" r="7435" b="13070">Resources</wd>

<space/>

<wd l="7498" t="12907" r="8496" b="13114">Employed</wd>

</ln>

</para>

<para l="6091" t="13325" r="10488" b="14006" alignment="justified" spaceBefore="141" lsp="exactly" lspExact="263" language="en">

<ln l="6091" t="13325" r="10488" b="13522" baseLine="13474" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="13325" r="6418" b="13478">The</wd>

<space/>

<wd l="6475" t="13325" r="7603" b="13522">computations</wd>

<space/>

<wd l="7656" t="13325" r="7906" b="13478">for</wd>

<space/>

<wd l="7954" t="13325" r="8606" b="13522">training</wd>

<space/>

<wd l="8664" t="13325" r="8962" b="13478">and</wd>

<space/>

<wd l="9010" t="13325" r="9571" b="13522">testing</wd>

<space/>

<wd l="9624" t="13373" r="10032" b="13478">were</wd>

<space/>

<wd l="10085" t="13325" r="10488" b="13478">done</wd>

<space/>

</ln>

<ln l="6096" t="13589" r="10488" b="13786" baseLine="13733" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6096" t="13637" r="6302" b="13742">on</wd>

<space/>

<wd l="6384" t="13637" r="6475" b="13742">a</wd>

<space/>

<wd l="6547" t="13589" r="7416" b="13742">MacBook.</wd>

<space/>

<wd l="7560" t="13589" r="8333" b="13786">Required</wd>

<space/>

<wd l="8414" t="13589" r="9619" b="13786">computational</wd>

<space/>

<wd l="9691" t="13637" r="10488" b="13742">resources</wd>

<space/>

</ln>

<ln l="6091" t="13853" r="9898" b="14006" baseLine="13997" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6091" t="13901" r="6499" b="14006">were</wd>

<space/>

<wd l="6557" t="13853" r="7301" b="14006">minimal.</wd>

<space/>

<wd l="7382" t="13858" r="7781" b="14006">Data</wd>

<space/>

<wd l="7834" t="13901" r="8626" b="14006">resources</wd>

<space/>

<wd l="8688" t="13901" r="8942" b="14006">are</wd>

<space/>

<wd l="9000" t="13901" r="9168" b="14006">as</wd>

<space/>

<wd l="9226" t="13853" r="9898" b="14006">follows:</wd>

</ln>

</para>

<para l="6091" t="14294" r="10483" b="14755" alignment="justified" li="216" spaceBefore="179" spaceAfter="1" fli="-216" lsp="exactly" lspExact="263" language="en">

<ln l="6091" t="14294" r="10483" b="14491" baseLine="14438">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6091" t="14299" r="6283" b="14448">A.</wd>

<space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6403" t="14294" r="6898" b="14448">CMU</wd>

<space/>

<wd l="7042" t="14294" r="8189" b="14491">pronouncing</wd>

<space/>

<wd l="8338" t="14294" r="9317" b="14491">dictionary:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9581" t="14299" r="9869" b="14448">“an</wd>

<space/>

<wd l="10018" t="14342" r="10483" b="14491">open-</wd>

</run>

</ln>

<ln l="6403" t="14558" r="10483" b="14755" baseLine="14702">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6403" t="14606" r="6946" b="14712">source</wd>

<space/>

<wd l="7051" t="14558" r="8554" b="14712">machine-readable</wd>

<space/>

<wd l="8659" t="14558" r="9835" b="14755">pronunciation</wd>

<space/>

<wd l="9946" t="14558" r="10483" b="14712">dictio-</wd>

</run>

<run fontFace="Times New Roman" fontFamily="roman" fontPitch="variable"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="5776" t="15746" r="6181" b="15975">

<para l="5809" t="15792" r="6148" b="15946" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5875" t="15792" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="29">

<wd l="5875" t="15792" r="6082" b="15946">84</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4312.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1405" marginTop="1440" marginRight="1396" marginBottom="1302" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1405" t="2120" r="10513" b="4257">

<column l="1405" t="2120" r="10513" b="4257">

<table l="4046" t="2131" r="7810" b="3658" alignment="left" li="2641" ri="2703" spaceBefore="11" spaceAfter="17">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<gridTable>

<gridCol>1234</gridCol>

<gridCol>960</gridCol>

<gridCol>763</gridCol>

<gridCol>807</gridCol>

<gridRow>283</gridRow>

<gridRow>240</gridRow>

<gridRow>240</gridRow>

<gridRow>240</gridRow>

<gridRow>240</gridRow>

<gridRow>284</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4210" t="2208" r="5146" b="2347" alignment="left" li="164" spaceAfter="16" lsp="exactly" lspExact="234" language="en">

<ln l="4210" t="2208" r="5146" b="2347" baseLine="2338" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4210" t="2213" r="4637" b="2347">Team</wd>

<space/>

<wd l="4680" t="2213" r="5146" b="2347">Name</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5414" t="2208" r="6120" b="2386" alignment="centered" spaceAfter="16" lsp="exactly" lspExact="234" language="en">

<ln l="5414" t="2208" r="6120" b="2386" baseLine="2338" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="5414" t="2208" r="6120" b="2386">precision</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6408" t="2208" r="6835" b="2347" alignment="centered" spaceAfter="16" lsp="exactly" lspExact="234" language="en">

<ln l="6408" t="2208" r="6835" b="2347" baseLine="2338" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6408" t="2208" r="6830" b="2347">recall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7306" t="2208" r="7445" b="2342" alignment="centered" spaceAfter="16" lsp="exactly" lspExact="234" language="en">

<ln l="7306" t="2208" r="7445" b="2342" baseLine="2338" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-9">

<wd l="7306" t="2208" r="7445" b="2342">f1</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4210" t="2448" r="4853" b="2587" alignment="left" li="164" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<ln l="4210" t="2448" r="4853" b="2587" baseLine="2578" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="4210" t="2448" r="4507" b="2587">IHS</wd>

<space/>

<wd l="4589" t="2453" r="4853" b="2582">RD</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5501" t="2448" r="6029" b="2587" alignment="left" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<tabs position="5501"/>

<ln l="5501" t="2448" r="6029" b="2587" baseLine="2578" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="5501" t="2448" r="6029" b="2587">0.8469</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6355" t="2448" r="6878" b="2587" alignment="left" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<tabs position="6355"/>

<ln l="6355" t="2448" r="6878" b="2587" baseLine="2578" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6355" t="2448" r="6878" b="2587">0.8083</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7118" t="2448" r="7651" b="2587" alignment="left" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<tabs position="7118"/>

<ln l="7118" t="2448" r="7651" b="2587" baseLine="2578" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7118" t="2448" r="7651" b="2587">0.8272</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4210" t="2688" r="5083" b="2827" alignment="left" li="164" spaceAfter="12" lsp="exactly" lspExact="214" language="en">

<ln l="4210" t="2688" r="5083" b="2827" baseLine="2818" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4210" t="2688" r="5083" b="2827">USZEGED</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5501" t="2688" r="6029" b="2827" alignment="left" spaceAfter="12" lsp="exactly" lspExact="214" language="en">

<tabs position="5501"/>

<ln l="5501" t="2688" r="6029" b="2827" baseLine="2818" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="5501" t="2688" r="6029" b="2827">0.8606</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6355" t="2688" r="6888" b="2827" alignment="left" spaceAfter="12" lsp="exactly" lspExact="214" language="en">

<tabs position="6355"/>

<ln l="6355" t="2688" r="6888" b="2827" baseLine="2818" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6355" t="2688" r="6888" b="2827">0.7564</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7118" t="2688" r="7651" b="2827" alignment="left" spaceAfter="12" lsp="exactly" lspExact="214" language="en">

<tabs position="7118"/>

<ln l="7118" t="2688" r="7651" b="2827" baseLine="2818" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7118" t="2688" r="7651" b="2827">0.8052</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4210" t="2928" r="4594" b="3067" alignment="left" li="164" spaceAfter="16" lsp="exactly" lspExact="214" language="en">

<ln l="4210" t="2928" r="4594" b="3067" baseLine="3058" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4210" t="2928" r="4594" b="3067">bekli</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5501" t="2928" r="6034" b="3067" alignment="left" spaceAfter="16" lsp="exactly" lspExact="214" language="en">

<tabs position="5501"/>

<ln l="5501" t="2928" r="6034" b="3067" baseLine="3058" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="5501" t="2928" r="6034" b="3067">0.7732</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6355" t="2928" r="6883" b="3067" alignment="left" spaceAfter="16" lsp="exactly" lspExact="214" language="en">

<tabs position="6355"/>

<ln l="6355" t="2928" r="6883" b="3067" baseLine="3058" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6355" t="2928" r="6883" b="3067">0.7416</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7118" t="2928" r="7632" b="3067" alignment="left" spaceAfter="16" lsp="exactly" lspExact="214" language="en">

<tabs position="7118"/>

<ln l="7118" t="2928" r="7632" b="3067" baseLine="3058" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7118" t="2928" r="7632" b="3067">0.7571</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4214" t="3168" r="4546" b="3346" alignment="left" li="164" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<ln l="4214" t="3168" r="4546" b="3346" baseLine="3298" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4214" t="3168" r="4546" b="3346">gigo</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5501" t="3168" r="6024" b="3307" alignment="left" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<tabs position="5501"/>

<ln l="5501" t="3168" r="6024" b="3307" baseLine="3298" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="5501" t="3168" r="6024" b="3307">0.7593</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6355" t="3168" r="6878" b="3307" alignment="left" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<tabs position="6355"/>

<ln l="6355" t="3168" r="6878" b="3307" baseLine="3298" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6355" t="3168" r="6878" b="3307">0.6963</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7118" t="3168" r="7651" b="3307" alignment="left" spaceAfter="21" lsp="exactly" lspExact="214" language="en">

<tabs position="7118"/>

<ln l="7118" t="3168" r="7651" b="3307" baseLine="3298" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7118" t="3168" r="7651" b="3307">0.7264</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4210" t="3408" r="4877" b="3586" alignment="left" li="164" spaceAfter="69" lsp="exactly" lspExact="214" language="en">

<ln l="4210" t="3408" r="4877" b="3586" baseLine="3538" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4210" t="3408" r="4877" b="3586">lysgroup</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5501" t="3408" r="6034" b="3547" alignment="left" spaceAfter="69" lsp="exactly" lspExact="214" language="en">

<tabs position="5501"/>

<ln l="5501" t="3408" r="6034" b="3547" baseLine="3538" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="5501" t="3408" r="6034" b="3547">0.4592</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="6355" t="3408" r="6883" b="3547" alignment="left" spaceAfter="69" lsp="exactly" lspExact="214" language="en">

<tabs position="6355"/>

<ln l="6355" t="3408" r="6883" b="3547" baseLine="3538" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6355" t="3408" r="6883" b="3547">0.6296</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="7166" t="3408" r="7584" b="3547" alignment="left" spaceAfter="69" lsp="exactly" lspExact="214" language="en">

<tabs position="7166"/>

<ln l="7166" t="3408" r="7584" b="3547" baseLine="3538" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7166" t="3408" r="7584" b="3547">0.531</wd>

</ln>

</para>

</cell>

</table>

<para l="4042" t="3730" r="7858" b="3869" alignment="centered" spaceAfter="339" lsp="exactly" lspExact="234" language="en">

<ln l="4042" t="3730" r="7858" b="3869" baseLine="3859" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4042" t="3730" r="4464" b="3869">Table</wd>

<space/>

<wd l="4531" t="3730" r="4646" b="3869">1:</wd>

<space/>

<wd l="4723" t="3734" r="5150" b="3869">Team</wd>

<space/>

<wd l="5198" t="3730" r="5760" b="3869">Results</wd>

<space/>

<wd l="5818" t="3730" r="6043" b="3869">for</wd>

<space/>

<wd l="6091" t="3730" r="6326" b="3869">the</wd>

<space/>

<wd l="6374" t="3730" r="7459" b="3869">unconstrained</wd>

<space/>

<wd l="7507" t="3730" r="7858" b="3869">task.</wd>

</ln>

</para>

</column>

</section>

<section l="1405" t="4257" r="10513" b="14772">

<column l="1405" t="4257" r="5840" b="14772">

<para l="1714" t="4315" r="5808" b="4776" alignment="justified" li="288" spaceBefore="29" lsp="exactly" lspExact="264" language="en">

<ln l="1714" t="4315" r="5808" b="4512" baseLine="4459" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="4363" r="2088" b="4512">nary</wd>

<space/>

<wd l="2170" t="4315" r="2419" b="4469">for</wd>

<space/>

<wd l="2496" t="4315" r="2990" b="4469">North</wd>

<space/>

<wd l="3072" t="4315" r="3902" b="4469">American</wd>

<space/>

<wd l="3984" t="4315" r="4632" b="4512">English</wd>

<space/>

<wd l="4709" t="4315" r="5030" b="4469">that</wd>

<space/>

<wd l="5112" t="4315" r="5808" b="4469">contains</wd>

<space/>

</ln>

<ln l="1718" t="4546" r="5664" b="4776" baseLine="4719">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1718" t="4627" r="2088" b="4733">over</wd>

<space/>

<wd l="2155" t="4584" r="2822" b="4762">134,000</wd>

<space/>

<wd l="2880" t="4579" r="3389" b="4733">words</wd>

<space/>

<wd l="3451" t="4579" r="3754" b="4733">and</wd>

<space/>

<wd l="3806" t="4579" r="4195" b="4733">their</wd>

<space/>

</run>

<wd l="4248" t="4546" r="5664" b="4776"><run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">pronunciations”</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">3</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1426" t="5035" r="5813" b="5712" alignment="justified" li="288" spaceBefore="201" fli="-288" lsp="exactly" lspExact="257" language="en">

<ln l="1426" t="5035" r="5803" b="5232" baseLine="5179">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="5040" r="1603" b="5189">B.</wd>

<space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1718" t="5050" r="2222" b="5189">count</wd>

<space/>

<wd l="2309" t="5035" r="2842" b="5189">1w.txt</wd>

<space/>

<wd l="2894" t="5035" r="3331" b="5189">from</wd>

<space/>

<wd l="3389" t="5040" r="3869" b="5189">Peter</wd>

<space/>

</run>

<wd l="3917" t="5035" r="4579" b="5232"><run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Norvig</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4666" t="5035" r="5083" b="5189">“The</wd>

<space/>

<wd l="5155" t="5040" r="5400" b="5189">1/3</wd>

<space/>

<wd l="5462" t="5035" r="5803" b="5184">mil-</wd>

</run>

</ln>

<ln l="1714" t="5294" r="5813" b="5491" baseLine="5443" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1714" t="5294" r="2045" b="5448">lion</wd>

<space/>

<wd l="2141" t="5318" r="2558" b="5448">most</wd>

<space/>

<wd l="2650" t="5294" r="3360" b="5491">frequent</wd>

<space/>

<wd l="3451" t="5294" r="4008" b="5477">words,</wd>

<space/>

<wd l="4133" t="5294" r="4339" b="5448">all</wd>

<space/>

<wd l="4435" t="5294" r="5318" b="5477">lowercase,</wd>

<space/>

<wd l="5438" t="5294" r="5813" b="5448">with</wd>

<space/>

</ln>

<ln l="1718" t="5525" r="2530" b="5712" baseLine="5688">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1718" t="5563" r="2390" b="5712">counts.”</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2453" t="5525" r="2530" b="5630">4</wd>

</run>

</ln>

</para>

<para l="1430" t="6014" r="5803" b="6475" alignment="justified" li="288" spaceBefore="166" fli="-288" lsp="exactly" lspExact="264" language="en">

<ln l="1430" t="6014" r="5803" b="6211" baseLine="6158">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1430" t="6019" r="1603" b="6168">C.</wd>

<space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1723" t="6014" r="1920" b="6168">10</wd>

<space/>

<wd l="1997" t="6014" r="2630" b="6168">million</wd>

<space/>

<wd l="2702" t="6029" r="3259" b="6168">tweets</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3341" t="6014" r="4099" b="6168">collected</wd>

<space/>

<wd l="4171" t="6014" r="4378" b="6211">by</wd>

<space/>

<wd l="4459" t="6019" r="5021" b="6168">Steven</wd>

<space/>

<wd l="5088" t="6014" r="5803" b="6197">Bedrick,</wd>

<space/>

</run>

</ln>

<ln l="1718" t="6278" r="5309" b="6475" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1718" t="6278" r="1901" b="6432">of</wd>

<space/>

<wd l="1949" t="6283" r="2578" b="6475">Oregon</wd>

<space/>

<wd l="2630" t="6278" r="3192" b="6432">Health</wd>

<space/>

<wd l="3250" t="6278" r="3552" b="6432">and</wd>

<space/>

<wd l="3610" t="6278" r="4339" b="6432">Sciences</wd>

<space/>

<wd l="4397" t="6278" r="5309" b="6475">University.</wd>

</ln>

</para>

<para l="1411" t="6696" r="5578" b="6888" alignment="left" spaceBefore="225" lsp="exactly" lspExact="264" language="en">

<ln l="1411" t="6696" r="5578" b="6888" baseLine="6874">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="1411" t="6739" r="1603" b="6888">D.</wd>

<space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="1714" t="6739" r="2371" b="6888">WNUT</wd>

<space/>

<wd l="2429" t="6734" r="3082" b="6888">Lexical</wd>

<space/>

<wd l="3139" t="6734" r="4392" b="6888">normalisation</wd>

<space/>

</run>

<wd l="4454" t="6696" r="5578" b="6888"><run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">dictionaries</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">5</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1733" t="7138" r="2731" b="7291" alignment="left" li="288" spaceBefore="107" lsp="exactly" lspExact="264" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1733" t="7138" r="2731" b="7291" baseLine="7282" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-10">

<wd l="1733" t="7142" r="1973" b="7282">a.</wd>

<space/>

<wd l="1973" t="7138" r="2731" b="7291">UniMelb</wd>

</ln>

</para>

<para l="1723" t="7445" r="2789" b="7598" alignment="left" li="288" spaceBefore="43" lsp="exactly" lspExact="264" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1723" t="7445" r="2789" b="7598" baseLine="7589" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">

<wd l="1723" t="7445" r="1973" b="7589">b.</wd>

<space/>

<wd l="1973" t="7445" r="2789" b="7598">UTDallas</wd>

</ln>

</para>

<para l="1435" t="7862" r="3586" b="8098" alignment="left" spaceBefore="222" lsp="exactly" lspExact="264" language="en">

<ln l="1435" t="7862" r="3586" b="8098" baseLine="8038">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="1435" t="7906" r="1603" b="8054">E.</wd>

<space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="1714" t="7906" r="2371" b="8054">WNUT</wd>

<space/>

<wd l="2429" t="7901" r="3154" b="8098">training</wd>

<space/>

</run>

<wd l="3216" t="7862" r="3586" b="8054"><run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">set</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">.</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">6</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1464" t="8352" r="5803" b="8813" alignment="justified" li="288" spaceBefore="194" fli="-288" lsp="exactly" lspExact="264" language="en">

<ln l="1464" t="8352" r="5803" b="8549" baseLine="8501">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1464" t="8357" r="1603" b="8506">F.</wd>

<space/>

</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1714" t="8357" r="2299" b="8506">Urban</wd>

<space/>

<wd l="2419" t="8352" r="3442" b="8549">Dictionary:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3658" t="8400" r="3749" b="8506">a</wd>

<space/>

<wd l="3864" t="8352" r="4397" b="8549">highly</wd>

<space/>

<wd l="4522" t="8352" r="5275" b="8506">inclusive</wd>

<space/>

<wd l="5395" t="8400" r="5803" b="8506">user-</wd>

</run>

</ln>

<ln l="1718" t="8582" r="4142" b="8813" baseLine="8759">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1718" t="8616" r="2539" b="8813">generated</wd>

<space/>

<wd l="2597" t="8616" r="3115" b="8770">online</wd>

<space/>

</run>

<wd l="3178" t="8582" r="4142" b="8813"><run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">dictionary.</run>

<run underlined="none" subsuperscript="superscript" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">7</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1411" t="9082" r="2477" b="9250" alignment="left" spaceBefore="181" lsp="exactly" lspExact="264" language="en">

<ln l="1411" t="9082" r="2477" b="9250" baseLine="9240" bold="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="1411" t="9082" r="1522" b="9245">4</wd>

<space/>

<wd l="1766" t="9086" r="2477" b="9250">Results</wd>

</ln>

</para>

<para l="1411" t="9514" r="5813" b="11275" alignment="justified" spaceBefore="147" lsp="exactly" lspExact="264" language="en">

<ln l="1411" t="9514" r="5798" b="9667" baseLine="9658" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9514" r="1877" b="9667">Table</wd>

<space/>

<wd l="1949" t="9518" r="2016" b="9662">1</wd>

<space/>

<wd l="2098" t="9514" r="2606" b="9667">shows</wd>

<space/>

<wd l="2659" t="9514" r="2918" b="9667">the</wd>

<space/>

<wd l="2971" t="9514" r="3518" b="9667">results</wd>

<space/>

<wd l="3576" t="9514" r="3821" b="9667">for</wd>

<space/>

<wd l="3874" t="9514" r="4128" b="9667">the</wd>

<space/>

<wd l="4181" t="9514" r="5366" b="9667">unconstrained</wd>

<space/>

<wd l="5419" t="9514" r="5798" b="9667">task.</wd>

<space/>

</ln>

<ln l="1411" t="9778" r="5813" b="9974" baseLine="9922" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="9778" r="1738" b="9931">The</wd>

<space/>

<wd l="1805" t="9778" r="2395" b="9974">project</wd>

<space/>

<wd l="2458" t="9778" r="3264" b="9931">described</wd>

<space/>

<wd l="3326" t="9778" r="3494" b="9926">in</wd>

<space/>

<wd l="3557" t="9778" r="3811" b="9931">the</wd>

<space/>

<wd l="3878" t="9802" r="4493" b="9974">present</wd>

<space/>

<wd l="4550" t="9778" r="4987" b="9931">work</wd>

<space/>

<wd l="5045" t="9778" r="5179" b="9931">is</wd>

<space/>

<wd l="5251" t="9778" r="5813" b="9931">named</wd>

<space/>

</ln>

<ln l="1416" t="10037" r="5808" b="10234" baseLine="10186" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10037" r="2064" b="10190">“bekli”.</wd>

<space/>

<wd l="2208" t="10037" r="2534" b="10190">The</wd>

<space/>

<wd l="2616" t="10037" r="3269" b="10234">training</wd>

<space/>

<wd l="3360" t="10061" r="3586" b="10190">set</wd>

<space/>

<wd l="3662" t="10037" r="4445" b="10190">consisted</wd>

<space/>

<wd l="4526" t="10037" r="4709" b="10190">of</wd>

<space/>

<wd l="4781" t="10042" r="5194" b="10190">2024</wd>

<space/>

<wd l="5270" t="10061" r="5808" b="10190">tweets</wd>

<space/>

</ln>

<ln l="1411" t="10301" r="5803" b="10454" baseLine="10445" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10301" r="1790" b="10454">with</wd>

<space/>

<wd l="1872" t="10349" r="1963" b="10454">a</wd>

<space/>

<wd l="2040" t="10301" r="2414" b="10454">total</wd>

<space/>

<wd l="2496" t="10301" r="2678" b="10454">of</wd>

<space/>

<wd l="2750" t="10306" r="3158" b="10454">3928</wd>

<space/>

<wd l="3245" t="10301" r="3792" b="10454">tokens</wd>

<space/>

<wd l="3874" t="10301" r="4195" b="10454">that</wd>

<space/>

<wd l="4267" t="10301" r="4867" b="10454">needed</wd>

<space/>

<wd l="4944" t="10325" r="5107" b="10454">to</wd>

<space/>

<wd l="5189" t="10301" r="5386" b="10454">be</wd>

<space/>

<wd l="5462" t="10349" r="5803" b="10454">nor-</wd>

</ln>

<ln l="1411" t="10565" r="5813" b="10718" baseLine="10709" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10565" r="2122" b="10718">malized.</wd>

<space/>

<wd l="2227" t="10565" r="2554" b="10718">The</wd>

<space/>

<wd l="2616" t="10589" r="2914" b="10718">test</wd>

<space/>

<wd l="2981" t="10589" r="3206" b="10718">set</wd>

<space/>

<wd l="3269" t="10565" r="4051" b="10718">consisted</wd>

<space/>

<wd l="4118" t="10565" r="4301" b="10718">of</wd>

<space/>

<wd l="4373" t="10565" r="4766" b="10718">1967</wd>

<space/>

<wd l="4834" t="10589" r="5371" b="10718">tweets</wd>

<space/>

<wd l="5438" t="10565" r="5813" b="10718">with</wd>

<space/>

</ln>

<ln l="1416" t="10829" r="5813" b="10982" baseLine="10973" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="10877" r="1507" b="10982">a</wd>

<space/>

<wd l="1570" t="10829" r="1944" b="10982">total</wd>

<space/>

<wd l="2011" t="10829" r="2194" b="10982">of</wd>

<space/>

<wd l="2246" t="10834" r="2654" b="10982">2738</wd>

<space/>

<wd l="2726" t="10829" r="3269" b="10982">tokens</wd>

<space/>

<wd l="3336" t="10829" r="3658" b="10982">that</wd>

<space/>

<wd l="3715" t="10829" r="4315" b="10982">needed</wd>

<space/>

<wd l="4378" t="10853" r="4536" b="10982">to</wd>

<space/>

<wd l="4598" t="10829" r="4795" b="10982">be</wd>

<space/>

<wd l="4862" t="10829" r="5813" b="10982">normalized</wd>

<space/>

</ln>

<ln l="1411" t="11088" r="3250" b="11275" baseLine="11237" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="11088" r="2131" b="11242">Baldwin</wd>

<space/>

<wd l="2189" t="11112" r="2338" b="11242">et</wd>

<space/>

<wd l="2395" t="11088" r="2582" b="11242">al.</wd>

<space/>

<wd l="2659" t="11088" r="3250" b="11275">(2015).</wd>

</ln>

</para>

<para l="1416" t="11554" r="2870" b="11722" alignment="left" spaceBefore="213" lsp="exactly" lspExact="264" language="en">

<ln l="1416" t="11554" r="2870" b="11722" baseLine="11712" bold="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="14">

<wd l="1416" t="11558" r="1522" b="11722">5</wd>

<space/>

<wd l="1771" t="11554" r="2870" b="11722">Conclusion</wd>

</ln>

</para>

<para l="1411" t="11986" r="5813" b="13498" alignment="justified" spaceBefore="147" spaceAfter="119" lsp="exactly" lspExact="264" language="en">

<ln l="1411" t="11986" r="5813" b="12182" baseLine="12130" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="11986" r="1738" b="12139">The</wd>

<space/>

<wd l="1786" t="11986" r="2333" b="12139">results</wd>

<space/>

<wd l="2390" t="11986" r="2822" b="12139">show</wd>

<space/>

<wd l="2870" t="11986" r="3187" b="12139">that</wd>

<space/>

<wd l="3235" t="12034" r="3326" b="12139">a</wd>

<space/>

<wd l="3379" t="11986" r="3931" b="12182">simple</wd>

<space/>

<wd l="3984" t="12010" r="4637" b="12182">strategy</wd>

<space/>

<wd l="4685" t="11986" r="5064" b="12139">with</wd>

<space/>

<wd l="5107" t="11986" r="5813" b="12139">minimal</wd>

<space/>

</ln>

<ln l="1416" t="12250" r="5803" b="12446" baseLine="12394" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12250" r="2621" b="12446">computational</wd>

<space/>

<wd l="2688" t="12298" r="3480" b="12403">resources</wd>

<space/>

<wd l="3552" t="12298" r="3835" b="12403">can</wd>

<space/>

<wd l="3907" t="12298" r="4109" b="12446">go</wd>

<space/>

<wd l="4181" t="12250" r="4642" b="12446">along</wd>

<space/>

<wd l="4709" t="12298" r="5083" b="12446">way.</wd>

<space/>

<wd l="5194" t="12254" r="5486" b="12403">For</wd>

<space/>

<wd l="5549" t="12298" r="5803" b="12403">ex-</wd>

</ln>

<ln l="1416" t="12509" r="5813" b="12706" baseLine="12658" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12509" r="1973" b="12706">ample,</wd>

<space/>

<wd l="2040" t="12509" r="2294" b="12662">the</wd>

<space/>

<wd l="2362" t="12557" r="2822" b="12706">space</wd>

<space/>

<wd l="2880" t="12509" r="3586" b="12706">required</wd>

<space/>

<wd l="3638" t="12509" r="3888" b="12662">for</wd>

<space/>

<wd l="3941" t="12509" r="4200" b="12662">the</wd>

<space/>

<wd l="4258" t="12509" r="4517" b="12662">list</wd>

<space/>

<wd l="4574" t="12509" r="4877" b="12662">and</wd>

<space/>

<wd l="4930" t="12509" r="5813" b="12662">rule-based</wd>

<space/>

</ln>

<ln l="1416" t="12773" r="5803" b="12970" baseLine="12917" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="12797" r="2429" b="12970">components</wd>

<space/>

<wd l="2491" t="12773" r="2626" b="12926">is</wd>

<space/>

<wd l="2688" t="12773" r="3571" b="12970">negligible.</wd>

<space/>

<wd l="3662" t="12773" r="3989" b="12926">The</wd>

<space/>

<wd l="4051" t="12773" r="4416" b="12970">only</wd>

<space/>

<wd l="4483" t="12773" r="5150" b="12926">element</wd>

<space/>

<wd l="5203" t="12773" r="5525" b="12926">that</wd>

<space/>

<wd l="5578" t="12821" r="5803" b="12926">re-</wd>

</ln>

<ln l="1416" t="13037" r="5803" b="13234" baseLine="13181" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="13037" r="1925" b="13234">quires</wd>

<space/>

<wd l="2021" t="13085" r="2458" b="13190">some</wd>

<space/>

<wd l="2544" t="13037" r="3038" b="13234">heavy</wd>

<space/>

<wd l="3130" t="13037" r="3638" b="13234">lifting</wd>

<space/>

<wd l="3730" t="13037" r="3864" b="13190">is</wd>

<space/>

<wd l="3950" t="13037" r="4210" b="13190">the</wd>

<space/>

<wd l="4301" t="13037" r="5496" b="13190">sentence-level</wd>

<space/>

<wd l="5578" t="13085" r="5803" b="13190">re-</wd>

</ln>

<ln l="1411" t="13301" r="4378" b="13498" baseLine="13445" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="13301" r="1954" b="13454">ranker</wd>

<space/>

<wd l="2006" t="13301" r="2381" b="13454">with</wd>

<space/>

<wd l="2434" t="13301" r="2630" b="13454">its</wd>

<space/>

<wd l="2688" t="13301" r="3058" b="13498">long</wd>

<space/>

<wd l="3115" t="13301" r="3379" b="13454">list</wd>

<space/>

<wd l="3432" t="13301" r="3614" b="13454">of</wd>

<space/>

<wd l="3658" t="13301" r="4378" b="13498">bigrams.</wd>

</ln>

</para>

<rulerline l="1405" t="13632" r="2578" b="13632" type="single" width="10" color="000000"/>

<para l="1661" t="13685" r="5122" b="14746" alignment="left" li="288" ri="648" spaceBefore="48" spaceAfter="11" lsp="exactly" lspExact="215" language="en">

<ln l="1666" t="13685" r="5006" b="13877" baseLine="13836">

<wd l="1666" t="13685" r="5006" b="13877"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">3</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">http://www.speech.cs.cmu.edu/cgi-bin/cmudict</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

</ln>

<ln l="1661" t="13901" r="4430" b="14093" baseLine="14052">

<wd l="1661" t="13901" r="3941" b="14093"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">4</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">http://norvig.com/ngrams/count</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="4018" t="13934" r="4430" b="14059">1w.txt</wd>

<space/>

</run>

</ln>

<ln l="1661" t="14117" r="5122" b="14314" baseLine="14268">

<wd l="1661" t="14117" r="5122" b="14314"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">5</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">http://noisy-text.github.io/norm-shared-task.html</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

</ln>

<ln l="1661" t="14333" r="5122" b="14530" baseLine="14484">

<wd l="1661" t="14333" r="5122" b="14530"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">6</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">http://noisy-text.github.io/norm-shared-task.html</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

</ln>

<ln l="1661" t="14554" r="4037" b="14746" baseLine="14699">

<wd l="1661" t="14554" r="4037" b="14746"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">7</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">http://www.urbandictionary.com/</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><nl orig="true"/>

</run>

</ln>

</para>

</column>

<column l="6078" t="4257" r="10513" b="14772">

<para l="6091" t="4310" r="10498" b="6614" alignment="justified" fli="216" lsp="exactly" lspExact="262" language="en">

<ln l="6302" t="4310" r="10483" b="4512" baseLine="4459" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="4320" r="7099" b="4498">However,</wd>

<space/>

<wd l="7195" t="4363" r="7358" b="4469">as</wd>

<space/>

<wd l="7440" t="4320" r="7507" b="4464">I</wd>

<space/>

<wd l="7589" t="4315" r="8395" b="4469">described</wd>

<space/>

<wd l="8472" t="4315" r="9010" b="4498">above,</wd>

<space/>

<wd l="9110" t="4315" r="9845" b="4512">selecting</wd>

<space/>

<wd l="9926" t="4315" r="10181" b="4469">the</wd>

<space/>

<wd l="10258" t="4339" r="10483" b="4469">to-</wd>

</ln>

<ln l="6091" t="4570" r="10488" b="4776" baseLine="4723" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="4579" r="6394" b="4733">ken</wd>

<space/>

<wd l="6470" t="4579" r="6850" b="4733">with</wd>

<space/>

<wd l="6926" t="4579" r="7181" b="4733">the</wd>

<space/>

<wd l="7262" t="4579" r="8741" b="4776">greatest-emission</wd>

<space/>

<wd l="8818" t="4579" r="9739" b="4776">probability</wd>

<space/>

<wd l="9826" t="4579" r="10488" b="4776">actually</wd>

<space/>

</ln>

<ln l="6091" t="4829" r="10488" b="5035" baseLine="4987" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="4838" r="6600" b="4992">works</wd>

<space/>

<wd l="6658" t="4838" r="7142" b="4992">better</wd>

<space/>

<wd l="7190" t="4838" r="7550" b="4992">than</wd>

<space/>

<wd l="7603" t="4886" r="7872" b="5035">my</wd>

<space/>

<wd l="7925" t="4838" r="8530" b="5035">bigram</wd>

<space/>

<wd l="8582" t="4838" r="9394" b="5035">approach,</wd>

<space/>

<wd l="9461" t="4838" r="9758" b="4992">and</wd>

<space/>

<wd l="9811" t="4838" r="10488" b="5035">requires</wd>

<space/>

</ln>

<ln l="6091" t="5093" r="10498" b="5299" baseLine="5246" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="5102" r="6326" b="5256">far</wd>

<space/>

<wd l="6379" t="5102" r="6691" b="5256">less</wd>

<space/>

<wd l="6754" t="5102" r="7843" b="5299">computation.</wd>

<space/>

<wd l="7925" t="5102" r="8294" b="5256">This</wd>

<space/>

<wd l="8352" t="5102" r="8870" b="5256">leaves</wd>

<space/>

<wd l="8928" t="5102" r="9182" b="5256">the</wd>

<space/>

<wd l="9245" t="5102" r="9998" b="5299">question:</wd>

<space/>

<wd l="10080" t="5102" r="10498" b="5256">what</wd>

<space/>

</ln>

<ln l="6091" t="5357" r="10483" b="5563" baseLine="5510" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="5366" r="6638" b="5520">results</wd>

<space/>

<wd l="6734" t="5366" r="7200" b="5520">could</wd>

<space/>

<wd l="7282" t="5366" r="7478" b="5520">be</wd>

<space/>

<wd l="7570" t="5366" r="8309" b="5520">achieved</wd>

<space/>

<wd l="8390" t="5366" r="8846" b="5563">using</wd>

<space/>

<wd l="8938" t="5414" r="9029" b="5520">a</wd>

<space/>

<wd l="9110" t="5366" r="9595" b="5520">better</wd>

<space/>

<wd l="9677" t="5366" r="10483" b="5549">re-ranker,</wd>

<space/>

</ln>

<ln l="6096" t="5621" r="10483" b="5827" baseLine="5774" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="5678" r="6394" b="5784">one</wd>

<space/>

<wd l="6475" t="5630" r="6797" b="5784">that</wd>

<space/>

<wd l="6883" t="5630" r="7901" b="5827">successfully</wd>

<space/>

<wd l="7987" t="5630" r="8645" b="5827">exploits</wd>

<space/>

<wd l="8736" t="5635" r="9442" b="5784">context?</wd>

<space/>

<wd l="9590" t="5630" r="10008" b="5784">Such</wd>

<space/>

<wd l="10090" t="5678" r="10181" b="5784">a</wd>

<space/>

<wd l="10258" t="5678" r="10483" b="5784">re-</wd>

</ln>

<ln l="6091" t="5880" r="10483" b="6086" baseLine="6038" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="5890" r="6634" b="6043">ranker</wd>

<space/>

<wd l="6701" t="5890" r="7272" b="6072">would,</wd>

<space/>

<wd l="7363" t="5938" r="7930" b="6086">among</wd>

<space/>

<wd l="8011" t="5890" r="8443" b="6043">other</wd>

<space/>

<wd l="8515" t="5890" r="9216" b="6072">benefits,</wd>

<space/>

<wd l="9302" t="5890" r="9758" b="6043">make</wd>

<space/>

<wd l="9830" t="5890" r="9955" b="6043">it</wd>

<space/>

<wd l="10022" t="5890" r="10483" b="6043">feasi-</wd>

</ln>

<ln l="6091" t="6144" r="10498" b="6336" baseLine="6298" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6154" r="6350" b="6307">ble</wd>

<space/>

<wd l="6403" t="6178" r="6566" b="6307">to</wd>

<space/>

<wd l="6624" t="6202" r="6902" b="6307">use</wd>

<space/>

<wd l="6960" t="6154" r="7368" b="6307">rules</wd>

<space/>

<wd l="7430" t="6202" r="7603" b="6307">or</wd>

<space/>

<wd l="7666" t="6154" r="8722" b="6307">substitutions</wd>

<space/>

<wd l="8784" t="6154" r="9106" b="6307">that</wd>

<space/>

<wd l="9158" t="6202" r="9408" b="6307">are</wd>

<space/>

<wd l="9466" t="6178" r="9782" b="6336">not,</wd>

<space/>

<wd l="9845" t="6154" r="10498" b="6307">without</wd>

<space/>

</ln>

<ln l="6091" t="6408" r="9499" b="6614" baseLine="6562" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="6418" r="6547" b="6614">using</wd>

<space/>

<wd l="6610" t="6442" r="7267" b="6600">context,</wd>

<space/>

<wd l="7334" t="6418" r="7973" b="6614">capable</wd>

<space/>

<wd l="8035" t="6418" r="8218" b="6571">of</wd>

<space/>

<wd l="8256" t="6418" r="8635" b="6614">high</wd>

<space/>

<wd l="8688" t="6418" r="9499" b="6614">precision.</wd>

</ln>

</para>

<para l="6091" t="6691" r="10502" b="8198" alignment="justified" spaceBefore="4" fli="216" lsp="exactly" lspExact="264" language="en">

<ln l="6302" t="6691" r="10493" b="6888" baseLine="6835" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="6691" r="6998" b="6845">Another</wd>

<space/>

<wd l="7080" t="6691" r="7790" b="6888">question</wd>

<space/>

<wd l="7872" t="6691" r="8722" b="6888">remaining</wd>

<space/>

<wd l="8808" t="6691" r="8942" b="6845">is</wd>

<space/>

<wd l="9024" t="6691" r="9379" b="6845">how</wd>

<space/>

<wd l="9461" t="6691" r="9931" b="6845">much</wd>

<space/>

<wd l="10013" t="6691" r="10493" b="6845">better</wd>

<space/>

</ln>

<ln l="6096" t="6950" r="10502" b="7147" baseLine="7099" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="6998" r="6379" b="7104">can</wd>

<space/>

<wd l="6485" t="6998" r="6730" b="7104">we</wd>

<space/>

<wd l="6835" t="6950" r="7037" b="7104">do</wd>

<space/>

<wd l="7142" t="6950" r="7349" b="7147">by</wd>

<space/>

<wd l="7459" t="6950" r="8328" b="7147">expanding</wd>

<space/>

<wd l="8434" t="6950" r="8688" b="7104">the</wd>

<space/>

<wd l="8798" t="6950" r="9413" b="7104">curated</wd>

<space/>

<wd l="9523" t="6974" r="10219" b="7147">segment</wd>

<space/>

<wd l="10320" t="6950" r="10502" b="7104">of</wd>

<space/>

</ln>

<ln l="6091" t="7214" r="10488" b="7411" baseLine="7358" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="7214" r="6350" b="7368">the</wd>

<space/>

<wd l="6418" t="7214" r="7032" b="7368">list—if</wd>

<space/>

<wd l="7085" t="7262" r="7378" b="7397">we,</wd>

<space/>

<wd l="7459" t="7214" r="7709" b="7368">for</wd>

<space/>

<wd l="7776" t="7214" r="8530" b="7411">example,</wd>

<space/>

<wd l="8616" t="7214" r="9187" b="7368">double</wd>

<space/>

<wd l="9254" t="7214" r="9514" b="7368">the</wd>

<space/>

<wd l="9590" t="7214" r="10003" b="7368">size?</wd>

<space/>

<wd l="10118" t="7214" r="10488" b="7368">This</wd>

<space/>

</ln>

<ln l="6091" t="7478" r="10488" b="7675" baseLine="7622" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="7478" r="6619" b="7632">would</wd>

<space/>

<wd l="6701" t="7478" r="7229" b="7632">would</wd>

<space/>

<wd l="7320" t="7478" r="7618" b="7632">still</wd>

<space/>

<wd l="7714" t="7478" r="8170" b="7632">allow</wd>

<space/>

<wd l="8256" t="7526" r="8347" b="7632">a</wd>

<space/>

<wd l="8429" t="7526" r="9149" b="7675">program</wd>

<space/>

<wd l="9226" t="7502" r="9384" b="7632">to</wd>

<space/>

<wd l="9470" t="7478" r="9859" b="7632">have</wd>

<space/>

<wd l="9950" t="7526" r="10042" b="7632">a</wd>

<space/>

<wd l="10118" t="7526" r="10488" b="7675">very</wd>

<space/>

</ln>

<ln l="6101" t="7742" r="10488" b="7939" baseLine="7886" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6101" t="7742" r="6552" b="7896">small</wd>

<space/>

<wd l="6610" t="7742" r="7814" b="7939">computational</wd>

<space/>

<wd l="7867" t="7742" r="8534" b="7939">imprint,</wd>

<space/>

<wd l="8597" t="7742" r="9062" b="7896">while</wd>

<space/>

<wd l="9125" t="7742" r="9595" b="7939">doing</wd>

<space/>

<wd l="9653" t="7742" r="10176" b="7939">nearly</wd>

<space/>

<wd l="10234" t="7742" r="10488" b="7896">the</wd>

<space/>

</ln>

<ln l="6091" t="8002" r="9226" b="8198" baseLine="8150" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="8002" r="6528" b="8155">work</wd>

<space/>

<wd l="6581" t="8002" r="6763" b="8155">of</wd>

<space/>

<wd l="6811" t="8050" r="6902" b="8155">a</wd>

<space/>

<wd l="6955" t="8050" r="7387" b="8155">more</wd>

<space/>

<wd l="7450" t="8002" r="8549" b="8198">sophisticated</wd>

<space/>

<wd l="8611" t="8026" r="9226" b="8198">system.</wd>

</ln>

</para>

<para l="6096" t="8693" r="7166" b="8861" alignment="left" spaceBefore="444" lsp="exactly" lspExact="264" language="en">

<ln l="6096" t="8693" r="7166" b="8861" baseLine="8856" bold="true" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="8693" r="7166" b="8861">References</wd>

</ln>

</para>

<para l="6091" t="9106" r="10493" b="10675" alignment="justified" li="216" spaceBefore="123" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="6091" t="9106" r="10483" b="9269" baseLine="9240" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6091" t="9106" r="6643" b="9245">Akshat</wd>

<space/>

<wd l="6706" t="9106" r="7440" b="9269">Bakliwal,</wd>

<space/>

<wd l="7517" t="9106" r="8146" b="9245">Jennifer</wd>

<space/>

<wd l="8203" t="9110" r="8717" b="9269">Foster,</wd>

<space/>

<wd l="8794" t="9106" r="9422" b="9245">Jennifer</wd>

<space/>

<wd l="9480" t="9149" r="9749" b="9245">van</wd>

<space/>

<wd l="9826" t="9106" r="10070" b="9245">der</wd>

<space/>

<wd l="10133" t="9106" r="10483" b="9269">Puil,</wd>

<space/>

</ln>

<ln l="6302" t="9341" r="10483" b="9518" baseLine="9470" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6302" t="9346" r="6624" b="9480">Ron</wd>

<space/>

<wd l="6677" t="9341" r="7344" b="9504">O’Brien,</wd>

<space/>

<wd l="7402" t="9341" r="7896" b="9480">Lamia</wd>

<space/>

<wd l="7939" t="9341" r="8501" b="9504">Tounsi,</wd>

<space/>

<wd l="8563" t="9341" r="8837" b="9480">and</wd>

<space/>

<wd l="8885" t="9341" r="9307" b="9480">Mark</wd>

<space/>

<wd l="9350" t="9341" r="9979" b="9518">Hughes.</wd>

<space/>

<wd l="10061" t="9341" r="10483" b="9480">2013.</wd>

<space/>

</ln>

<ln l="6307" t="9571" r="10483" b="9749" baseLine="9701" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6307" t="9571" r="7090" b="9710">Sentiment</wd>

<space/>

<wd l="7142" t="9571" r="7752" b="9749">analysis</wd>

<space/>

<wd l="7814" t="9571" r="7987" b="9710">of</wd>

<space/>

<wd l="8021" t="9571" r="8654" b="9749">political</wd>

<space/>

<wd l="8707" t="9590" r="9240" b="9710">tweets:</wd>

<space/>

<wd l="9317" t="9571" r="9965" b="9710">Towards</wd>

<space/>

<wd l="10027" t="9614" r="10200" b="9710">an</wd>

<space/>

<wd l="10262" t="9614" r="10483" b="9710">ac-</wd>

</ln>

<ln l="6307" t="9802" r="10493" b="9979" baseLine="9936">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6307" t="9821" r="6778" b="9941">curate</wd>

<space/>

<wd l="6850" t="9802" r="7555" b="9941">classifier.</wd>

<space/>

<wd l="7694" t="9806" r="7858" b="9936">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="7925" t="9802" r="8875" b="9979">Proceedings</wd>

<space/>

<wd l="8952" t="9802" r="9130" b="9979">of</wd>

<space/>

<wd l="9168" t="9802" r="9394" b="9941">the</wd>

<space/>

<wd l="9475" t="9802" r="10229" b="9979">Workshop</wd>

<space/>

<wd l="10306" t="9850" r="10493" b="9941">on</wd>

<space/>

</run>

</ln>

<ln l="6298" t="10032" r="10488" b="10210" baseLine="10166">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6298" t="10037" r="7070" b="10210">Language</wd>

<space/>

<wd l="7109" t="10032" r="7776" b="10210">Analysis</wd>

<space/>

<wd l="7838" t="10037" r="7982" b="10171">in</wd>

<space/>

<wd l="8030" t="10032" r="8520" b="10171">Social</wd>

<space/>

</run>

<wd l="8563" t="10032" r="9101" b="10195"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">Media</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="9158" t="10075" r="9590" b="10210">pages</wd>

<space/>

<wd l="9653" t="10032" r="10171" b="10195">49–58,</wd>

<space/>

<wd l="10234" t="10032" r="10488" b="10171">At-</wd>

</run>

</ln>

<ln l="6302" t="10267" r="10493" b="10445" baseLine="10397" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6302" t="10267" r="6720" b="10430">lanta,</wd>

<space/>

<wd l="6821" t="10267" r="7474" b="10445">Georgia,</wd>

<space/>

<wd l="7570" t="10272" r="7958" b="10406">June.</wd>

<space/>

<wd l="8050" t="10267" r="8962" b="10406">Association</wd>

<space/>

<wd l="9038" t="10267" r="9269" b="10406">for</wd>

<space/>

<wd l="9350" t="10267" r="10493" b="10445">Computational</wd>

<space/>

</ln>

<ln l="6302" t="10498" r="7200" b="10675" baseLine="10627" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6302" t="10498" r="7200" b="10675">Linguistics.</wd>

</ln>

</para>

<para l="6091" t="10762" r="10493" b="12331" alignment="justified" li="216" spaceBefore="18" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="6091" t="10762" r="10483" b="10939" baseLine="10896" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="10762" r="6749" b="10939">Timothy</wd>

<space/>

<wd l="6792" t="10762" r="7483" b="10925">Baldwin,</wd>

<space/>

<wd l="7536" t="10762" r="7997" b="10901">Marie</wd>

<space/>

<wd l="8045" t="10762" r="8789" b="10901">Catherine</wd>

<space/>

<wd l="8832" t="10762" r="9014" b="10901">de</wd>

<space/>

<wd l="9053" t="10762" r="9806" b="10925">Marneffe,</wd>

<space/>

<wd l="9854" t="10766" r="10075" b="10901">Bo</wd>

<space/>

<wd l="10123" t="10766" r="10483" b="10925">Han,</wd>

<space/>

</ln>

<ln l="6302" t="10997" r="10483" b="11174" baseLine="11126" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="11002" r="7253" b="11174">Young-Bum</wd>

<space/>

<wd l="7334" t="10997" r="7714" b="11160">Kim,</wd>

<space/>

<wd l="7814" t="10997" r="8184" b="11136">Alan</wd>

<space/>

<wd l="8270" t="10997" r="8746" b="11160">Ritter,</wd>

<space/>

<wd l="8851" t="10997" r="9125" b="11136">and</wd>

<space/>

<wd l="9206" t="10997" r="9509" b="11136">Wei</wd>

<space/>

<wd l="9595" t="11002" r="9869" b="11136">Xu.</wd>

<space/>

<wd l="10061" t="10997" r="10483" b="11136">2015.</wd>

<space/>

</ln>

<ln l="6307" t="11227" r="10483" b="11405" baseLine="11357" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6307" t="11227" r="6840" b="11366">Shared</wd>

<space/>

<wd l="6931" t="11227" r="7310" b="11366">tasks</wd>

<space/>

<wd l="7416" t="11227" r="7584" b="11366">of</wd>

<space/>

<wd l="7661" t="11227" r="7896" b="11366">the</wd>

<space/>

<wd l="7992" t="11227" r="8362" b="11366">2015</wd>

<space/>

<wd l="8467" t="11227" r="9221" b="11405">workshop</wd>

<space/>

<wd l="9322" t="11270" r="9509" b="11366">on</wd>

<space/>

<wd l="9600" t="11227" r="10018" b="11405">noisy</wd>

<space/>

<wd l="10109" t="11270" r="10483" b="11366">user-</wd>

</ln>

<ln l="6307" t="11458" r="10493" b="11635" baseLine="11592" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6307" t="11458" r="7056" b="11635">generated</wd>

<space/>

<wd l="7176" t="11477" r="7498" b="11597">text:</wd>

<space/>

<wd l="7718" t="11458" r="8275" b="11597">Twitter</wd>

<space/>

<wd l="8390" t="11458" r="8899" b="11597">lexical</wd>

<space/>

<wd l="9019" t="11458" r="10094" b="11597">normalization</wd>

<space/>

<wd l="10219" t="11458" r="10493" b="11597">and</wd>

<space/>

</ln>

<ln l="6302" t="11688" r="10488" b="11866" baseLine="11822">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6302" t="11688" r="6821" b="11827">named</wd>

<space/>

<wd l="6869" t="11688" r="7301" b="11866">entity</wd>

<space/>

<wd l="7349" t="11688" r="8266" b="11866">recognition.</wd>

<space/>

<wd l="8347" t="11693" r="8506" b="11822">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8554" t="11688" r="9504" b="11866">Proceedings</wd>

<space/>

<wd l="9557" t="11688" r="9734" b="11866">of</wd>

<space/>

<wd l="9754" t="11688" r="9979" b="11827">the</wd>

<space/>

<wd l="10037" t="11688" r="10488" b="11827">Work-</wd>

</run>

</ln>

<ln l="6302" t="11923" r="10483" b="12101" baseLine="12053">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6302" t="11923" r="6662" b="12101">shop</wd>

<space/>

<wd l="6768" t="11971" r="6955" b="12062">on</wd>

<space/>

<wd l="7046" t="11928" r="7493" b="12101">Noisy</wd>

<space/>

<wd l="7608" t="11923" r="8803" b="12101">User-generated</wd>

<space/>

<wd l="8899" t="11928" r="9206" b="12062">Text</wd>

<space/>

<wd l="9302" t="11928" r="9917" b="12096">(WNUT</wd>

<space/>

</run>

<wd l="9994" t="11923" r="10483" b="12096"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6302" t="12154" r="7469" b="12331" baseLine="12283" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="12154" r="6912" b="12331">Beijing,</wd>

<space/>

<wd l="6974" t="12154" r="7469" b="12293">China.</wd>

</ln>

</para>

<para l="6091" t="12418" r="10488" b="13291" alignment="justified" li="216" spaceBefore="31" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="6091" t="12418" r="10483" b="12595" baseLine="12552" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6091" t="12418" r="6720" b="12557">Luciano</wd>

<space/>

<wd l="6787" t="12418" r="7421" b="12557">Barbosa</wd>

<space/>

<wd l="7483" t="12418" r="7757" b="12557">and</wd>

<space/>

<wd l="7814" t="12418" r="8314" b="12557">Junlan</wd>

<space/>

<wd l="8381" t="12422" r="8803" b="12595">Feng.</wd>

<space/>

<wd l="8923" t="12418" r="9346" b="12557">2010.</wd>

<space/>

<wd l="9461" t="12418" r="10008" b="12557">Robust</wd>

<space/>

<wd l="10070" t="12418" r="10483" b="12557">senti-</wd>

</ln>

<ln l="6302" t="12653" r="10483" b="12830" baseLine="12782" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6302" t="12672" r="6696" b="12792">ment</wd>

<space/>

<wd l="6754" t="12653" r="7454" b="12792">detection</wd>

<space/>

<wd l="7517" t="12696" r="7704" b="12792">on</wd>

<space/>

<wd l="7766" t="12653" r="8270" b="12792">twitter</wd>

<space/>

<wd l="8328" t="12653" r="8707" b="12792">from</wd>

<space/>

<wd l="8760" t="12653" r="9250" b="12792">biased</wd>

<space/>

<wd l="9317" t="12653" r="9590" b="12792">and</wd>

<space/>

<wd l="9648" t="12653" r="10066" b="12830">noisy</wd>

<space/>

<wd l="10128" t="12653" r="10483" b="12792">data.</wd>

<space/>

</ln>

<ln l="6302" t="12883" r="10488" b="13061" baseLine="13013">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6302" t="12888" r="6466" b="13018">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6538" t="12883" r="7488" b="13061">Proceedings</wd>

<space/>

<wd l="7570" t="12883" r="7747" b="13061">of</wd>

<space/>

<wd l="7790" t="12883" r="8016" b="13022">the</wd>

<space/>

<wd l="8088" t="12883" r="8458" b="13022">23rd</wd>

<space/>

<wd l="8515" t="12883" r="9542" b="13022">International</wd>

<space/>

<wd l="9619" t="12883" r="10488" b="13061">Conference</wd>

<space/>

</run>

</ln>

<ln l="6307" t="13114" r="10397" b="13291" baseLine="13248">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6307" t="13162" r="6494" b="13253">on</wd>

<space/>

<wd l="6557" t="13114" r="7709" b="13291">Computational</wd>

<space/>

<wd l="7747" t="13118" r="8654" b="13291">Linguistics:</wd>

<space/>

</run>

<wd l="8726" t="13118" r="9326" b="13277"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Posters</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="9384" t="13157" r="9816" b="13291">pages</wd>

<space/>

<wd l="9878" t="13114" r="10397" b="13253">36–44.</wd>

</run>

</ln>

</para>

<para l="6096" t="13378" r="10502" b="14246" alignment="justified" li="216" spaceBefore="22" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="6096" t="13378" r="10483" b="13555" baseLine="13512" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6096" t="13378" r="6706" b="13555">Cynthia</wd>

<space/>

<wd l="6797" t="13378" r="7234" b="13517">Chew</wd>

<space/>

<wd l="7334" t="13378" r="7608" b="13517">and</wd>

<space/>

<wd l="7704" t="13378" r="8333" b="13517">Gunther</wd>

<space/>

<wd l="8419" t="13378" r="9298" b="13555">Eysenbach.</wd>

<space/>

<wd l="9509" t="13378" r="9931" b="13517">2010.</wd>

<space/>

<wd l="10142" t="13382" r="10483" b="13517">Pan-</wd>

</ln>

<ln l="6307" t="13613" r="10502" b="13790" baseLine="13742" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6307" t="13613" r="6845" b="13752">demics</wd>

<space/>

<wd l="6955" t="13613" r="7109" b="13747">in</wd>

<space/>

<wd l="7214" t="13613" r="7450" b="13752">the</wd>

<space/>

<wd l="7560" t="13656" r="7824" b="13790">age</wd>

<space/>

<wd l="7934" t="13613" r="8102" b="13752">of</wd>

<space/>

<wd l="8194" t="13613" r="8741" b="13752">twitter:</wd>

<space/>

<wd l="8933" t="13632" r="9499" b="13752">content</wd>

<space/>

<wd l="9610" t="13613" r="10219" b="13790">analysis</wd>

<space/>

<wd l="10334" t="13613" r="10502" b="13752">of</wd>

<space/>

</ln>

<ln l="6302" t="13843" r="10483" b="14021" baseLine="13973">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6302" t="13862" r="6792" b="13982">tweets</wd>

<space/>

<wd l="6893" t="13843" r="7387" b="14021">during</wd>

<space/>

<wd l="7488" t="13843" r="7723" b="13982">the</wd>

<space/>

<wd l="7819" t="13843" r="8198" b="13982">2009</wd>

<space/>

<wd l="8294" t="13843" r="8659" b="13978">h1n1</wd>

<space/>

<wd l="8779" t="13843" r="9490" b="13982">outbreak.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9706" t="13843" r="10075" b="13982">PloS</wd>

<space/>

</run>

<wd l="10171" t="13891" r="10483" b="14006"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">one</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6307" t="14074" r="7382" b="14246" baseLine="14208" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6307" t="14074" r="7382" b="14246">5(11):e14118.</wd>

</ln>

</para>

<para l="6091" t="14338" r="10493" b="14750" alignment="justified" li="216" spaceBefore="26" spaceAfter="8" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="6091" t="14338" r="10493" b="14515" baseLine="14472" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6091" t="14338" r="6720" b="14515">Monojit</wd>

<space/>

<wd l="6806" t="14338" r="7699" b="14515">Choudhury,</wd>

<space/>

<wd l="7805" t="14338" r="8266" b="14477">Rahul</wd>

<space/>

<wd l="8357" t="14338" r="8798" b="14501">Saraf,</wd>

<space/>

<wd l="8909" t="14338" r="9254" b="14515">Vijit</wd>

<space/>

<wd l="9336" t="14338" r="9686" b="14501">Jain,</wd>

<space/>

<wd l="9797" t="14338" r="10493" b="14477">Animesh</wd>

<space/>

</ln>

<ln l="6302" t="14573" r="10483" b="14750" baseLine="14702" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6302" t="14573" r="7181" b="14750">Mukherjee,</wd>

<space/>

<wd l="7334" t="14573" r="8069" b="14712">Sudeshna</wd>

<space/>

<wd l="8194" t="14573" r="8726" b="14736">Sarkar,</wd>

<space/>

<wd l="8875" t="14573" r="9149" b="14712">and</wd>

<space/>

<wd l="9274" t="14573" r="9941" b="14750">Anupam</wd>

<space/>

<wd l="10056" t="14578" r="10483" b="14712">Basu.</wd>

</ln>

</para>

</column>

</section>

<dd l="1405" t="15736" r="10513" b="15977">

<para l="5809" t="15787" r="6138" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5875" t="15787" r="6072" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="36">

<wd l="5875" t="15787" r="6072" b="15946">85</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4312.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1411" marginTop="1440" marginRight="6078" marginBottom="858" offsetX="8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1411" t="2160" r="5831" b="15980">

<column l="1411" t="2160" r="5831" b="15980">

<para l="1627" t="2213" r="5813" b="2846" alignment="justified" li="216" lsp="exactly" lspExact="232" language="en">

<ln l="1627" t="2213" r="5813" b="2390" baseLine="2342" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1627" t="2213" r="2050" b="2352">2007.</wd>

<space/>

<wd l="2270" t="2213" r="3269" b="2390">Investigation</wd>

<space/>

<wd l="3365" t="2213" r="3638" b="2352">and</wd>

<space/>

<wd l="3734" t="2213" r="4459" b="2390">modeling</wd>

<space/>

<wd l="4560" t="2213" r="4728" b="2352">of</wd>

<space/>

<wd l="4810" t="2213" r="5045" b="2352">the</wd>

<space/>

<wd l="5146" t="2232" r="5813" b="2352">structure</wd>

<space/>

</ln>

<ln l="1627" t="2443" r="5803" b="2621" baseLine="2578">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1627" t="2443" r="1800" b="2582">of</wd>

<space/>

<wd l="1882" t="2443" r="2410" b="2621">texting</wd>

<space/>

<wd l="2510" t="2443" r="3245" b="2621">language.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3470" t="2448" r="3720" b="2582">Int.</wd>

<space/>

<wd l="3830" t="2448" r="3941" b="2582">J.</wd>

<space/>

<wd l="4051" t="2448" r="4406" b="2582">Doc.</wd>

<space/>

<wd l="4507" t="2443" r="4915" b="2582">Anal.</wd>

<space/>

</run>

<wd l="5026" t="2448" r="5803" b="2621"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Recognit.</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1642" t="2674" r="3691" b="2846" baseLine="2808" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="2674" r="2813" b="2846">10(3):157–174,</wd>

<space/>

<wd l="2870" t="2674" r="3691" b="2813">December.</wd>

</ln>

</para>

<para l="1411" t="2923" r="5818" b="4493" alignment="justified" li="216" spaceBefore="6" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1411" t="2923" r="5808" b="3101" baseLine="3053" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="2928" r="1632" b="3062">Bo</wd>

<space/>

<wd l="1704" t="2928" r="2021" b="3062">Han</wd>

<space/>

<wd l="2098" t="2923" r="2371" b="3062">and</wd>

<space/>

<wd l="2438" t="2923" r="3096" b="3101">Timothy</wd>

<space/>

<wd l="3163" t="2923" r="3859" b="3062">Baldwin.</wd>

<space/>

<wd l="3998" t="2923" r="4421" b="3062">2011.</wd>

<space/>

<wd l="4560" t="2923" r="5136" b="3062">Lexical</wd>

<space/>

<wd l="5203" t="2923" r="5808" b="3062">normal-</wd>

</ln>

<ln l="1622" t="3154" r="5803" b="3331" baseLine="3288" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="3154" r="2141" b="3293">isation</wd>

<space/>

<wd l="2208" t="3154" r="2381" b="3293">of</wd>

<space/>

<wd l="2438" t="3154" r="2818" b="3293">short</wd>

<space/>

<wd l="2880" t="3173" r="3173" b="3293">text</wd>

<space/>

<wd l="3235" t="3197" r="4003" b="3331">messages:</wd>

<space/>

<wd l="4109" t="3154" r="4560" b="3293">Makn</wd>

<space/>

<wd l="4637" t="3197" r="4949" b="3293">sens</wd>

<space/>

<wd l="5030" t="3197" r="5112" b="3293">a</wd>

<space/>

<wd l="5174" t="3154" r="5803" b="3293">#twitter.</wd>

<space/>

</ln>

<ln l="1622" t="3384" r="5808" b="3562" baseLine="3518">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1622" t="3389" r="1786" b="3518">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1843" t="3384" r="2798" b="3562">Proceedings</wd>

<space/>

<wd l="2861" t="3384" r="3038" b="3562">of</wd>

<space/>

<wd l="3067" t="3384" r="3293" b="3523">the</wd>

<space/>

<wd l="3355" t="3384" r="3696" b="3523">49th</wd>

<space/>

<wd l="3739" t="3384" r="4315" b="3523">Annual</wd>

<space/>

<wd l="4363" t="3389" r="5002" b="3562">Meeting</wd>

<space/>

<wd l="5064" t="3384" r="5242" b="3562">of</wd>

<space/>

<wd l="5270" t="3384" r="5496" b="3523">the</wd>

<space/>

<wd l="5539" t="3389" r="5808" b="3523">As-</wd>

</run>

</ln>

<ln l="1622" t="3619" r="5808" b="3797" baseLine="3749" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="3624" r="2333" b="3758">sociation</wd>

<space/>

<wd l="2352" t="3619" r="2611" b="3797">for</wd>

<space/>

<wd l="2664" t="3619" r="3816" b="3797">Computational</wd>

<space/>

<wd l="3854" t="3624" r="4762" b="3797">Linguistics:</wd>

<space/>

<wd l="4829" t="3624" r="5400" b="3758">Human</wd>

<space/>

<wd l="5443" t="3624" r="5808" b="3758">Lan-</wd>

</ln>

<ln l="1622" t="3850" r="5818" b="4027" baseLine="3979">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1622" t="3898" r="2088" b="4027">guage</wd>

<space/>

<wd l="2160" t="3850" r="3154" b="4027">Technologies</wd>

<space/>

<wd l="3221" t="3931" r="3269" b="3950">-</wd>

<space/>

<wd l="3346" t="3850" r="3898" b="3989">Volume</wd>

<space/>

</run>

<wd l="3965" t="3850" r="4090" b="4013"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4157" t="3854" r="4512" b="3984">HLT</wd>

<space/>

<wd l="4584" t="3850" r="4867" b="4013">’11,</wd>

<space/>

<wd l="4930" t="3893" r="5362" b="4027">pages</wd>

<space/>

<wd l="5434" t="3850" r="5818" b="3989">368–</wd>

<space/>

</run>

</ln>

<ln l="1627" t="4080" r="5808" b="4258" baseLine="4214" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1627" t="4080" r="1954" b="4243">378,</wd>

<space/>

<wd l="2035" t="4080" r="3005" b="4258">Stroudsburg,</wd>

<space/>

<wd l="3082" t="4080" r="3346" b="4243">PA,</wd>

<space/>

<wd l="3422" t="4080" r="3845" b="4219">USA.</wd>

<space/>

<wd l="3917" t="4080" r="4829" b="4219">Association</wd>

<space/>

<wd l="4891" t="4080" r="5122" b="4219">for</wd>

<space/>

<wd l="5184" t="4080" r="5808" b="4258">Compu-</wd>

</ln>

<ln l="1622" t="4315" r="3149" b="4493" baseLine="4445" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="4315" r="2203" b="4454">tational</wd>

<space/>

<wd l="2251" t="4315" r="3149" b="4493">Linguistics.</wd>

</ln>

</para>

<para l="1416" t="4560" r="5813" b="5899" alignment="justified" li="216" spaceBefore="8" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1416" t="4560" r="5813" b="4738" baseLine="4690">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="1416" t="4560" r="2165" b="4699">Catherine</wd>

<space/>

<wd l="2342" t="4560" r="2870" b="4723">Kobus,</wd>

<space/>

</run>

<wd l="3082" t="4560" r="3744" b="4738"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">Franc</run>

<run underlined="none" subsuperscript="subscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">¸</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">ois</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><space/>

<wd l="3922" t="4565" r="4387" b="4723">Yvon,</wd>

<space/>

<wd l="4603" t="4560" r="4877" b="4699">and</wd>

<space/>

<wd l="5054" t="4560" r="5813" b="4699">G´eraldine</wd>

<space/>

</run>

</ln>

<ln l="1622" t="4790" r="5803" b="4968" baseLine="4925" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1622" t="4790" r="2328" b="4930">Damnati.</wd>

<space/>

<wd l="2414" t="4790" r="2832" b="4930">2008.</wd>

<space/>

<wd l="2914" t="4790" r="3888" b="4968">Normalizing</wd>

<space/>

<wd l="3946" t="4834" r="4272" b="4930">sms:</wd>

<space/>

<wd l="4344" t="4790" r="4637" b="4930">Are</wd>

<space/>

<wd l="4675" t="4810" r="4958" b="4930">two</wd>

<space/>

<wd l="5006" t="4790" r="5803" b="4968">metaphors</wd>

<space/>

</ln>

<ln l="1622" t="5026" r="5808" b="5203" baseLine="5155">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="1622" t="5026" r="2069" b="5165">better</wd>

<space/>

<wd l="2136" t="5026" r="2462" b="5165">than</wd>

<space/>

<wd l="2544" t="5026" r="2899" b="5165">one?</wd>

<space/>

<wd l="3101" t="5030" r="3259" b="5160">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="3331" t="5026" r="4286" b="5203">Proceedings</wd>

<space/>

<wd l="4363" t="5026" r="4541" b="5203">of</wd>

<space/>

<wd l="4584" t="5026" r="4814" b="5165">the</wd>

<space/>

<wd l="4886" t="5026" r="5314" b="5165">22Nd</wd>

<space/>

<wd l="5371" t="5030" r="5808" b="5165">Inter-</wd>

</run>

</ln>

<ln l="1622" t="5256" r="5808" b="5434" baseLine="5386" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1622" t="5256" r="2275" b="5395">national</wd>

<space/>

<wd l="2357" t="5256" r="3226" b="5434">Conference</wd>

<space/>

<wd l="3317" t="5304" r="3504" b="5395">on</wd>

<space/>

<wd l="3590" t="5256" r="4747" b="5434">Computational</wd>

<space/>

<wd l="4814" t="5261" r="5669" b="5434">Linguistics</wd>

<space/>

<wd l="5760" t="5338" r="5808" b="5357">-</wd>

<space/>

</ln>

<ln l="1637" t="5486" r="5803" b="5664" baseLine="5621">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><wd l="1637" t="5486" r="2189" b="5626">Volume</wd>

<space/>

</run>

<wd l="2251" t="5486" r="2376" b="5650"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4"><space/>

<wd l="2438" t="5486" r="3163" b="5626">COLING</wd>

<space/>

<wd l="3226" t="5486" r="3509" b="5650">’08,</wd>

<space/>

<wd l="3566" t="5530" r="3998" b="5664">pages</wd>

<space/>

<wd l="4056" t="5486" r="4771" b="5650">441–448,</wd>

<space/>

<wd l="4834" t="5486" r="5803" b="5664">Stroudsburg,</wd>

<space/>

</run>

</ln>

<ln l="1622" t="5722" r="5765" b="5899" baseLine="5851" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="1622" t="5722" r="1891" b="5885">PA,</wd>

<space/>

<wd l="1949" t="5722" r="2376" b="5861">USA.</wd>

<space/>

<wd l="2434" t="5722" r="3346" b="5861">Association</wd>

<space/>

<wd l="3394" t="5722" r="3624" b="5861">for</wd>

<space/>

<wd l="3672" t="5722" r="4814" b="5899">Computational</wd>

<space/>

<wd l="4867" t="5722" r="5765" b="5899">Linguistics.</wd>

</ln>

</para>

<para l="1416" t="5966" r="5818" b="7306" alignment="justified" li="216" spaceBefore="3" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="1416" t="5966" r="5808" b="6144" baseLine="6096" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="5966" r="1819" b="6106">Chen</wd>

<space/>

<wd l="1915" t="5966" r="2083" b="6101">Li</wd>

<space/>

<wd l="2184" t="5966" r="2458" b="6106">and</wd>

<space/>

<wd l="2549" t="5971" r="2942" b="6144">Yang</wd>

<space/>

<wd l="3038" t="5966" r="3346" b="6106">Liu.</wd>

<space/>

<wd l="3571" t="5966" r="3989" b="6106">2014.</wd>

<space/>

<wd l="4210" t="5966" r="5016" b="6144">Improving</wd>

<space/>

<wd l="5117" t="5986" r="5405" b="6106">text</wd>

<space/>

<wd l="5496" t="6010" r="5808" b="6106">nor-</wd>

</ln>

<ln l="1622" t="6197" r="5813" b="6374" baseLine="6331" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1622" t="6197" r="2438" b="6336">malization</wd>

<space/>

<wd l="2491" t="6197" r="2726" b="6336">via</wd>

<space/>

<wd l="2779" t="6197" r="3797" b="6374">unsupervised</wd>

<space/>

<wd l="3850" t="6197" r="4334" b="6336">model</wd>

<space/>

<wd l="4387" t="6197" r="4666" b="6336">and</wd>

<space/>

<wd l="4718" t="6197" r="5813" b="6336">discriminative</wd>

<space/>

</ln>

<ln l="1622" t="6432" r="5818" b="6610" baseLine="6562">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1622" t="6432" r="2405" b="6610">reranking.</wd>

<space/>

<wd l="2558" t="6437" r="2722" b="6566">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2794" t="6432" r="3749" b="6610">Proceedings</wd>

<space/>

<wd l="3826" t="6432" r="4008" b="6610">of</wd>

<space/>

<wd l="4051" t="6432" r="4277" b="6571">the</wd>

<space/>

<wd l="4339" t="6437" r="4704" b="6571">ACL</wd>

<space/>

<wd l="4776" t="6432" r="5160" b="6571">2014</wd>

<space/>

<wd l="5232" t="6432" r="5818" b="6571">Student</wd>

<space/>

</run>

</ln>

<ln l="1618" t="6662" r="5808" b="6840" baseLine="6792">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1618" t="6662" r="2333" b="6802">Research</wd>

<space/>

</run>

<wd l="2419" t="6662" r="3211" b="6840"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Workshop</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3302" t="6706" r="3734" b="6840">pages</wd>

<space/>

<wd l="3826" t="6662" r="4339" b="6826">86–93,</wd>

<space/>

<wd l="4430" t="6662" r="5242" b="6826">Baltimore,</wd>

<space/>

<wd l="5328" t="6667" r="5808" b="6840">Mary-</wd>

</run>

</ln>

<ln l="1622" t="6893" r="5808" b="7070" baseLine="7027" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1622" t="6893" r="1997" b="7056">land,</wd>

<space/>

<wd l="2064" t="6893" r="2491" b="7056">USA,</wd>

<space/>

<wd l="2558" t="6898" r="2952" b="7032">June.</wd>

<space/>

<wd l="3019" t="6893" r="3931" b="7032">Association</wd>

<space/>

<wd l="3989" t="6893" r="4219" b="7032">for</wd>

<space/>

<wd l="4277" t="6893" r="5419" b="7070">Computational</wd>

<space/>

<wd l="5482" t="6893" r="5808" b="7027">Lin-</wd>

</ln>

<ln l="1627" t="7128" r="2256" b="7306" baseLine="7258" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1627" t="7128" r="2256" b="7306">guistics.</wd>

</ln>

</para>

<para l="1411" t="7373" r="5813" b="8712" alignment="justified" li="216" spaceBefore="19" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="1411" t="7373" r="5813" b="7550" baseLine="7502" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="7373" r="1819" b="7512">Jiwei</wd>

<space/>

<wd l="1858" t="7373" r="2069" b="7536">Li,</wd>

<space/>

<wd l="2117" t="7373" r="2486" b="7512">Alan</wd>

<space/>

<wd l="2530" t="7373" r="3000" b="7536">Ritter,</wd>

<space/>

<wd l="3058" t="7373" r="3331" b="7512">and</wd>

<space/>

<wd l="3365" t="7373" r="3926" b="7512">Eduard</wd>

<space/>

<wd l="3965" t="7378" r="4142" b="7512">H.</wd>

<space/>

<wd l="4186" t="7378" r="4642" b="7550">Hovy.</wd>

<space/>

<wd l="4728" t="7373" r="5146" b="7512">2014.</wd>

<space/>

<wd l="5227" t="7373" r="5813" b="7550">Weakly</wd>

<space/>

</ln>

<ln l="1632" t="7603" r="5808" b="7781" baseLine="7738">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1632" t="7603" r="2453" b="7781">supervised</wd>

<space/>

<wd l="2496" t="7646" r="2822" b="7742">user</wd>

<space/>

<wd l="2866" t="7603" r="3370" b="7781">profile</wd>

<space/>

<wd l="3418" t="7603" r="4181" b="7742">extraction</wd>

<space/>

<wd l="4229" t="7603" r="4608" b="7742">from</wd>

<space/>

<wd l="4651" t="7603" r="5179" b="7742">twitter.</wd>

<space/>

<wd l="5261" t="7608" r="5424" b="7738">In</wd>

<space/>

</run>

<wd l="5467" t="7608" r="5808" b="7742" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Pro-</wd>

</ln>

<ln l="1627" t="7838" r="5808" b="8016" baseLine="7968" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1627" t="7838" r="2299" b="8016">ceedings</wd>

<space/>

<wd l="2381" t="7838" r="2558" b="8016">of</wd>

<space/>

<wd l="2606" t="7838" r="2832" b="7978">the</wd>

<space/>

<wd l="2909" t="7838" r="3307" b="7978">52nd</wd>

<space/>

<wd l="3355" t="7838" r="3931" b="7978">Annual</wd>

<space/>

<wd l="3998" t="7843" r="4637" b="8016">Meeting</wd>

<space/>

<wd l="4714" t="7838" r="4891" b="8016">of</wd>

<space/>

<wd l="4939" t="7838" r="5165" b="7978">the</wd>

<space/>

<wd l="5232" t="7843" r="5808" b="7978">Associ-</wd>

</ln>

<ln l="1622" t="8069" r="5808" b="8246" baseLine="8198" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="8074" r="2021" b="8208">ation</wd>

<space/>

<wd l="2054" t="8069" r="2314" b="8246">for</wd>

<space/>

<wd l="2376" t="8069" r="3533" b="8246">Computational</wd>

<space/>

<wd l="3586" t="8074" r="4464" b="8246">Linguistics,</wd>

<space/>

<wd l="4536" t="8074" r="4901" b="8208">ACL</wd>

<space/>

<wd l="4958" t="8069" r="5371" b="8232">2014,</wd>

<space/>

<wd l="5448" t="8074" r="5808" b="8208">June</wd>

<space/>

</ln>

<ln l="1622" t="8299" r="5813" b="8477" baseLine="8434" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="8299" r="2102" b="8462">22-27,</wd>

<space/>

<wd l="2203" t="8299" r="2616" b="8462">2014,</wd>

<space/>

<wd l="2712" t="8299" r="3509" b="8462">Baltimore,</wd>

<space/>

<wd l="3605" t="8304" r="3936" b="8462">MD,</wd>

<space/>

<wd l="4046" t="8304" r="4411" b="8462">USA,</wd>

<space/>

<wd l="4517" t="8299" r="5069" b="8438">Volume</wd>

<space/>

<wd l="5155" t="8299" r="5294" b="8438">1:</wd>

<space/>

<wd l="5410" t="8304" r="5813" b="8477">Long</wd>

<space/>

</ln>

<ln l="1622" t="8534" r="3456" b="8712" baseLine="8664">

<wd l="1622" t="8539" r="2194" b="8712"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Papers</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2251" t="8578" r="2683" b="8712">pages</wd>

<space/>

<wd l="2760" t="8534" r="3456" b="8674">165–174.</wd>

</run>

</ln>

</para>

<para l="1416" t="8779" r="5818" b="9883" alignment="justified" li="216" spaceBefore="11" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1416" t="8779" r="5803" b="8942" baseLine="8909" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="8779" r="1757" b="8918">Sara</wd>

<space/>

<wd l="1848" t="8779" r="2664" b="8942">Rosenthal,</wd>

<space/>

<wd l="2779" t="8779" r="3341" b="8918">Preslav</wd>

<space/>

<wd l="3437" t="8779" r="3974" b="8942">Nakov,</wd>

<space/>

<wd l="4090" t="8779" r="4752" b="8918">Svetlana</wd>

<space/>

<wd l="4848" t="8779" r="5803" b="8942">Kiritchenko,</wd>

<space/>

</ln>

<ln l="1627" t="9010" r="5808" b="9187" baseLine="9144" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1627" t="9010" r="1949" b="9149">Saif</wd>

<space/>

<wd l="2021" t="9014" r="2189" b="9144">M</wd>

<space/>

<wd l="2275" t="9010" r="3250" b="9173">Mohammad,</wd>

<space/>

<wd l="3350" t="9010" r="3720" b="9149">Alan</wd>

<space/>

<wd l="3806" t="9010" r="4282" b="9173">Ritter,</wd>

<space/>

<wd l="4387" t="9010" r="4661" b="9149">and</wd>

<space/>

<wd l="4742" t="9010" r="5314" b="9149">Veselin</wd>

<space/>

<wd l="5400" t="9010" r="5808" b="9187">Stoy-</wd>

</ln>

<ln l="1627" t="9245" r="5808" b="9422" baseLine="9374" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1627" t="9288" r="2026" b="9384">anov.</wd>

<space/>

<wd l="2126" t="9245" r="2549" b="9384">2015.</wd>

<space/>

<wd l="2650" t="9245" r="3744" b="9384">Semeval-2015</wd>

<space/>

<wd l="3806" t="9245" r="4123" b="9384">task</wd>

<space/>

<wd l="4190" t="9245" r="4406" b="9384">10:</wd>

<space/>

<wd l="4498" t="9245" r="5280" b="9384">Sentiment</wd>

<space/>

<wd l="5333" t="9245" r="5808" b="9422">analy-</wd>

</ln>

<ln l="1632" t="9475" r="5818" b="9653" baseLine="9605">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1632" t="9475" r="1819" b="9614">sis</wd>

<space/>

<wd l="1886" t="9475" r="2040" b="9610">in</wd>

<space/>

<wd l="2098" t="9475" r="2630" b="9614">twitter.</wd>

<space/>

<wd l="2746" t="9480" r="2904" b="9610">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2966" t="9475" r="3922" b="9653">Proceedings</wd>

<space/>

<wd l="3989" t="9475" r="4166" b="9653">of</wd>

<space/>

<wd l="4195" t="9475" r="4426" b="9614">the</wd>

<space/>

<wd l="4488" t="9475" r="4733" b="9614">9th</wd>

<space/>

<wd l="4790" t="9475" r="5818" b="9614">International</wd>

<space/>

</run>

</ln>

<ln l="1632" t="9706" r="5088" b="9883" baseLine="9840" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1632" t="9706" r="2386" b="9883">Workshop</wd>

<space/>

<wd l="2443" t="9754" r="2630" b="9845">on</wd>

<space/>

<wd l="2678" t="9710" r="3389" b="9845">Semantic</wd>

<space/>

<wd l="3432" t="9706" r="4315" b="9869">Evaluation,</wd>

<space/>

<wd l="4382" t="9706" r="5088" b="9845">SemEval.</wd>

</ln>

</para>

<para l="1411" t="9950" r="5818" b="10824" alignment="justified" li="216" spaceBefore="5" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="1411" t="9950" r="5818" b="10128" baseLine="10085" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1411" t="9950" r="2026" b="10090">Richard</wd>

<space/>

<wd l="2102" t="9950" r="2640" b="10128">Sproat,</wd>

<space/>

<wd l="2726" t="9950" r="3096" b="10090">Alan</wd>

<space/>

<wd l="3173" t="9955" r="3355" b="10090">W</wd>

<space/>

<wd l="3422" t="9950" r="3912" b="10114">Black,</wd>

<space/>

<wd l="4003" t="9950" r="4574" b="10128">Stanley</wd>

<space/>

<wd l="4651" t="9950" r="5093" b="10114">Chen,</wd>

<space/>

<wd l="5184" t="9950" r="5818" b="10090">Shankar</wd>

<space/>

</ln>

<ln l="1627" t="10186" r="5803" b="10363" baseLine="10315" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1627" t="10190" r="2189" b="10349">Kumar,</wd>

<space/>

<wd l="2304" t="10186" r="2678" b="10325">Mari</wd>

<space/>

<wd l="2779" t="10186" r="3586" b="10349">Ostendorf,</wd>

<space/>

<wd l="3706" t="10186" r="3979" b="10325">and</wd>

<space/>

<wd l="4075" t="10186" r="4987" b="10363">Christopher</wd>

<space/>

<wd l="5078" t="10186" r="5803" b="10325">Richards.</wd>

<space/>

</ln>

<ln l="1627" t="10416" r="5808" b="10555" baseLine="10550">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1627" t="10416" r="2050" b="10555">2001.</wd>

<space/>

<wd l="2208" t="10416" r="3326" b="10555">Normalization</wd>

<space/>

<wd l="3403" t="10416" r="3571" b="10555">of</wd>

<space/>

<wd l="3634" t="10416" r="4642" b="10555">non-standard</wd>

<space/>

<wd l="4714" t="10416" r="5222" b="10555">words.</wd>

<space/>

</run>

<wd l="5395" t="10421" r="5808" b="10555" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Com-</wd>

</ln>

<ln l="1608" t="10646" r="4925" b="10824" baseLine="10781">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1608" t="10675" r="2040" b="10824">puter</wd>

<space/>

<wd l="2083" t="10646" r="2626" b="10824">Speech</wd>

<space/>

<wd l="2688" t="10651" r="2818" b="10786">&amp;</wd>

<space/>

</run>

<wd l="2866" t="10651" r="3682" b="10824"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Language</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="3758" t="10646" r="4925" b="10819">15(3):287–333.</wd>

</run>

</ln>

</para>

<para l="1411" t="10896" r="5813" b="11534" alignment="justified" li="216" spaceBefore="3" fli="-216" lsp="exactly" lspExact="234" language="en">

<ln l="1411" t="10896" r="5813" b="11074" baseLine="11026" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1411" t="10896" r="1594" b="11030">Yi</wd>

<space/>

<wd l="1646" t="10901" r="2040" b="11074">Yang</wd>

<space/>

<wd l="2098" t="10896" r="2371" b="11035">and</wd>

<space/>

<wd l="2424" t="10896" r="2856" b="11035">Jacob</wd>

<space/>

<wd l="2914" t="10896" r="3744" b="11035">Eisenstein.</wd>

<space/>

<wd l="3835" t="10896" r="4258" b="11035">2013.</wd>

<space/>

<wd l="4339" t="10896" r="4478" b="11030">A</wd>

<space/>

<wd l="4526" t="10896" r="5285" b="11074">log-linear</wd>

<space/>

<wd l="5328" t="10896" r="5813" b="11035">model</wd>

<space/>

</ln>

<ln l="1622" t="11126" r="5803" b="11304" baseLine="11256">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1622" t="11126" r="1853" b="11266">for</wd>

<space/>

<wd l="1958" t="11126" r="2976" b="11304">unsupervised</wd>

<space/>

<wd l="3086" t="11146" r="3374" b="11266">text</wd>

<space/>

<wd l="3480" t="11126" r="4594" b="11266">normalization.</wd>

<space/>

<wd l="4862" t="11131" r="5021" b="11261">In</wd>

<space/>

</run>

<wd l="5126" t="11131" r="5803" b="11290"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">EMNLP</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1622" t="11357" r="2635" b="11534" baseLine="11491" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1622" t="11400" r="2059" b="11534">pages</wd>

<space/>

<wd l="2117" t="11357" r="2635" b="11496">61–72.</wd>

</ln>

</para>

<para l="1416" t="11606" r="5818" b="12710" alignment="justified" li="216" spaceBefore="11" spaceAfter="3253" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1416" t="11606" r="5813" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="11606" r="1973" b="11784">Congle</wd>

<space/>

<wd l="2093" t="11606" r="2626" b="11784">Zhang,</wd>

<space/>

<wd l="2779" t="11606" r="3182" b="11784">Tyler</wd>

<space/>

<wd l="3302" t="11606" r="3998" b="11770">Baldwin,</wd>

<space/>

<wd l="4147" t="11606" r="4762" b="11746">Howard</wd>

<space/>

<wd l="4886" t="11611" r="5160" b="11770">Ho,</wd>

<space/>

<wd l="5314" t="11611" r="5813" b="11784">Benny</wd>

<space/>

</ln>

<ln l="1627" t="11837" r="5808" b="12014" baseLine="11966" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1627" t="11837" r="2448" b="12000">Kimelfeld,</wd>

<space/>

<wd l="2539" t="11837" r="2813" b="11976">and</wd>

<space/>

<wd l="2890" t="11842" r="3470" b="12014">Yunyao</wd>

<space/>

<wd l="3552" t="11837" r="3758" b="11976">Li.</wd>

<space/>

<wd l="3926" t="11837" r="4344" b="11976">2013.</wd>

<space/>

<wd l="4507" t="11837" r="5208" b="12014">Adaptive</wd>

<space/>

<wd l="5280" t="11880" r="5808" b="12014">parser-</wd>

</ln>

<ln l="1627" t="12067" r="5813" b="12245" baseLine="12202">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1627" t="12067" r="2146" b="12206">centric</wd>

<space/>

<wd l="2203" t="12086" r="2496" b="12206">text</wd>

<space/>

<wd l="2544" t="12067" r="3658" b="12206">normalization.</wd>

<space/>

<wd l="3754" t="12072" r="3912" b="12202">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3965" t="12067" r="4920" b="12245">Proceedings</wd>

<space/>

<wd l="4982" t="12067" r="5160" b="12245">of</wd>

<space/>

<wd l="5184" t="12067" r="5414" b="12206">the</wd>

<space/>

<wd l="5467" t="12067" r="5813" b="12206">49th</wd>

<space/>

</run>

</ln>

<ln l="1608" t="12302" r="5818" b="12480" baseLine="12432" italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1608" t="12302" r="2189" b="12442">Annual</wd>

<space/>

<wd l="2232" t="12307" r="2870" b="12480">Meeting</wd>

<space/>

<wd l="2928" t="12302" r="3106" b="12480">of</wd>

<space/>

<wd l="3134" t="12302" r="3360" b="12442">the</wd>

<space/>

<wd l="3403" t="12307" r="4320" b="12442">Association</wd>

<space/>

<wd l="4344" t="12302" r="4608" b="12480">for</wd>

<space/>

<wd l="4661" t="12302" r="5818" b="12480">Computational</wd>

<space/>

</ln>

<ln l="1618" t="12533" r="4238" b="12710" baseLine="12662">

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1618" t="12538" r="2472" b="12710">Linguistics</wd>

<space/>

</run>

<wd l="2525" t="12533" r="2784" b="12706"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(1)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="2842" t="12576" r="3274" b="12710">pages</wd>

<space/>

<wd l="3350" t="12533" r="4238" b="12672">1159–1168.</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="5831" t="15736" r="6181" b="15980">

<para l="5831" t="15787" r="6148" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5875" t="15787" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="6">

<wd l="5875" t="15787" r="6082" b="15946">86</wd>

</ln>

</para>

</dd>

</body>

</page>

</document>

