<?xml version="1.0" encoding="UTF-16"?>

<!--XML document generated using OCR technology from Nuance Communications, Inc.-->

<document xmlns="http://www.scansoft.com/omnipage/xml/ssdoc-schema3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1416" marginTop="1420" marginRight="1392" marginBottom="358" offsetX="-8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1416" t="1420" r="10517" b="2575">

<column l="1416" t="1420" r="10517" b="2575">

<para l="2928" t="1488" r="8971" b="1762" alignment="centered" spaceBefore="10" spaceAfter="788" lsp="exactly" lspExact="345" language="en">

<ln l="2928" t="1488" r="8971" b="1762" baseLine="1690" bold="true" underlined="none" subsuperscript="none" fontSize="1500" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2928" t="1488" r="3144" b="1694">A</wd>

<space/>

<wd l="3221" t="1488" r="4680" b="1762">Normalizer</wd>

<space/>

<wd l="4766" t="1488" r="5141" b="1699">for</wd>

<space/>

<wd l="5222" t="1488" r="5866" b="1699">UGC</wd>

<space/>

<wd l="5962" t="1488" r="6197" b="1694">in</wd>

<space/>

<wd l="6288" t="1488" r="7459" b="1699">Brazilian</wd>

<space/>

<wd l="7546" t="1493" r="8971" b="1762">Portuguese</wd>

</ln>

</para>

</column>

</section>

<section l="1526" t="2575" r="10272" b="4152">

<column l="1526" t="2575" r="4555" b="4152">

<para l="1560" t="2626" r="4517" b="3840" alignment="centered" spaceBefore="20" lsp="exactly" lspExact="253" language="en">

<ln l="1886" t="2626" r="4210" b="2842" baseLine="2789" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="1886" t="2626" r="2597" b="2842">Magali</wd>

<space/>

<wd l="2669" t="2626" r="3480" b="2794">Sanches</wd>

<space/>

<wd l="3547" t="2626" r="4210" b="2794">Duran
</wd>

</ln>

<ln l="1560" t="2880" r="4517" b="3091" baseLine="3043" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="1560" t="2890" r="2074" b="3048">NILC</wd>

<space/>

<wd l="2136" t="2986" r="2198" b="3005">-</wd>

<space/>

<wd l="2261" t="2890" r="2851" b="3048">Center</wd>

<space/>

<wd l="2909" t="2890" r="3163" b="3048">for</wd>

<space/>

<wd l="3221" t="2890" r="4517" b="3091">Computational
</wd>

</ln>

<ln l="2554" t="3144" r="3528" b="3346" baseLine="3293" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="2554" t="3144" r="3528" b="3346">Linguistics
</wd>

</ln>

<ln l="1819" t="3398" r="4277" b="3600" baseLine="3547" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="1819" t="3398" r="2136" b="3557">São</wd>

<space/>

<wd l="2194" t="3398" r="2688" b="3557">Paulo</wd>

<space/>

<wd l="2750" t="3398" r="3677" b="3600">University</wd>

<space/>

<wd l="3734" t="3398" r="4277" b="3600">(USP)
</wd>

</ln>

<ln l="2093" t="3648" r="4003" b="3840" baseLine="3802" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="2093" t="3648" r="2410" b="3806">São</wd>

<space/>

<wd l="2472" t="3648" r="3408" b="3840">Carlos-SP,</wd>

<space/>

<wd l="3470" t="3648" r="4003" b="3806">Brazil</wd>

</ln>

</para>

<para l="1526" t="3955" r="4550" b="4147" alignment="left" spaceBefore="71" lsp="exactly" lspExact="216" language="en">

<ln l="1526" t="3955" r="4550" b="4147" baseLine="4104" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-5">

<wd l="1526" t="3960" r="4550" b="4147">magali.duran@uol.com.br</wd>

</ln>

</para>

</column>

<column l="4901" t="2575" r="7363" b="4152">

<para l="4901" t="2626" r="7358" b="3840" alignment="centered" spaceBefore="20" lsp="exactly" lspExact="253" language="en">

<ln l="5405" t="2626" r="6845" b="2837" baseLine="2789" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="5405" t="2626" r="6010" b="2794">Lucas</wd>

<space/>

<wd l="6077" t="2626" r="6845" b="2837">Avanço
</wd>

</ln>

<ln l="5323" t="2890" r="6926" b="3048" baseLine="3043" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="5323" t="2890" r="5837" b="3048">NILC</wd>

<space/>

<wd l="5904" t="2986" r="5966" b="3005">-</wd>

<space/>

<wd l="6029" t="2890" r="6614" b="3048">Center</wd>

<space/>

<wd l="6672" t="2890" r="6926" b="3048">for
</wd>

</ln>

<ln l="4963" t="3144" r="7291" b="3346" baseLine="3293" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="4963" t="3144" r="6259" b="3346">Computational</wd>

<space/>

<wd l="6322" t="3144" r="7291" b="3346">Linguistics
</wd>

</ln>

<ln l="4901" t="3398" r="7358" b="3600" baseLine="3547" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="4901" t="3398" r="5218" b="3557">São</wd>

<space/>

<wd l="5275" t="3398" r="5774" b="3557">Paulo</wd>

<space/>

<wd l="5832" t="3398" r="6763" b="3600">University</wd>

<space/>

<wd l="6821" t="3398" r="7358" b="3600">(USP)
</wd>

</ln>

<ln l="5174" t="3648" r="7090" b="3840" baseLine="3802" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="5174" t="3648" r="5491" b="3806">São</wd>

<space/>

<wd l="5558" t="3648" r="6490" b="3840">Carlos-SP,</wd>

<space/>

<wd l="6552" t="3648" r="7090" b="3806">Brazil</wd>

</ln>

</para>

<para l="4954" t="3950" r="7320" b="4147" alignment="left" spaceBefore="71" lsp="exactly" lspExact="216" language="en">

<ln l="4954" t="3950" r="7320" b="4147" baseLine="4104" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-2">

<wd l="4954" t="3960" r="7320" b="4147">avanco89@gmail.com</wd>

</ln>

</para>

</column>

<column l="7805" t="2575" r="10272" b="4152">

<para l="7805" t="2626" r="10267" b="3840" alignment="centered" spaceBefore="20" lsp="exactly" lspExact="253" language="en">

<ln l="7819" t="2626" r="10243" b="2842" baseLine="2789" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="7819" t="2626" r="8093" b="2794">M.</wd>

<space/>

<wd l="8170" t="2626" r="8885" b="2837">Graças</wd>

<space/>

<wd l="8952" t="2626" r="9547" b="2842">Volpe</wd>

<space/>

<wd l="9614" t="2626" r="10243" b="2842">Nunes
</wd>

</ln>

<ln l="8261" t="2890" r="9806" b="3048" baseLine="3043" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="8261" t="2890" r="8846" b="3048">NILC-</wd>

<space/>

<wd l="8909" t="2890" r="9494" b="3048">Center</wd>

<space/>

<wd l="9552" t="2890" r="9806" b="3048">for
</wd>

</ln>

<ln l="7867" t="3144" r="10200" b="3346" baseLine="3293" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="7867" t="3144" r="9168" b="3346">Computational</wd>

<space/>

<wd l="9230" t="3144" r="10200" b="3346">Linguistics
</wd>

</ln>

<ln l="7805" t="3398" r="10267" b="3600" baseLine="3547" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="7805" t="3398" r="8122" b="3557">São</wd>

<space/>

<wd l="8184" t="3398" r="8678" b="3557">Paulo</wd>

<space/>

<wd l="8741" t="3398" r="9667" b="3600">University</wd>

<space/>

<wd l="9725" t="3398" r="10267" b="3600">(USP)
</wd>

</ln>

<ln l="8083" t="3648" r="9994" b="3840" baseLine="3802" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2" forcedEOF="true">

<wd l="8083" t="3648" r="8400" b="3806">São</wd>

<space/>

<wd l="8462" t="3648" r="9398" b="3840">Carlos-SP,</wd>

<space/>

<wd l="9461" t="3648" r="9994" b="3806">Brazil</wd>

</ln>

</para>

<para l="7858" t="3950" r="10210" b="4147" alignment="left" spaceBefore="71" lsp="exactly" lspExact="216" language="en">

<ln l="7858" t="3950" r="10210" b="4147" baseLine="4104" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-1">

<wd l="7858" t="3960" r="10210" b="4147">gracan@icmc.usp.br</wd>

</ln>

</para>

</column>

</section>

<section l="1416" t="5070" r="10517" b="15356">

<column l="1416" t="5070" r="5813" b="15356">

<para l="3154" t="5366" r="4046" b="5534" alignment="centered" spaceBefore="244" lsp="exactly" lspExact="271" language="en">

<ln l="3154" t="5366" r="4046" b="5534" baseLine="5525" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3154" t="5366" r="4046" b="5534">Abstract</wd>

</ln>

</para>

<para l="1416" t="5870" r="5789" b="11846" alignment="justified" spaceBefore="238" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="5870" r="5779" b="6072" baseLine="6019" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="5870" r="2995" b="6072">User-generated</wd>

<space/>

<wd l="3106" t="5890" r="3830" b="6029">contents</wd>

<space/>

<wd l="3946" t="5870" r="4546" b="6072">(UGC)</wd>

<space/>

<wd l="4661" t="5890" r="5467" b="6072">represent</wd>

<space/>

<wd l="5582" t="5918" r="5779" b="6029">an</wd>

<space/>

</ln>

<ln l="1426" t="6120" r="5774" b="6322" baseLine="6274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6120" r="2275" b="6322">important</wd>

<space/>

<wd l="2338" t="6168" r="2899" b="6278">source</wd>

<space/>

<wd l="2962" t="6120" r="3158" b="6278">of</wd>

<space/>

<wd l="3197" t="6120" r="4229" b="6278">information</wd>

<space/>

<wd l="4286" t="6120" r="4541" b="6278">for</wd>

<space/>

<wd l="4594" t="6139" r="5774" b="6322">governments,</wd>

<space/>

</ln>

<ln l="1426" t="6374" r="5770" b="6576" baseLine="6528" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6374" r="2410" b="6576">companies,</wd>

<space/>

<wd l="2510" t="6374" r="3235" b="6576">political</wd>

<space/>

<wd l="3346" t="6374" r="4262" b="6533">candidates</wd>

<space/>

<wd l="4373" t="6374" r="4694" b="6533">and</wd>

<space/>

<wd l="4795" t="6422" r="5770" b="6533">consumers.</wd>

<space/>

</ln>

<ln l="1421" t="6629" r="5779" b="6830" baseLine="6778" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="6634" r="2270" b="6821">However,</wd>

<space/>

<wd l="2501" t="6648" r="2928" b="6787">most</wd>

<space/>

<wd l="3149" t="6629" r="3350" b="6787">of</wd>

<space/>

<wd l="3547" t="6629" r="3816" b="6787">the</wd>

<space/>

<wd l="4032" t="6629" r="4694" b="6787">Natural</wd>

<space/>

<wd l="4915" t="6634" r="5779" b="6830">Language</wd>

<space/>

</ln>

<ln l="1421" t="6878" r="5789" b="7080" baseLine="7032" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="6878" r="2371" b="7080">Processing</wd>

<space/>

<wd l="2482" t="6878" r="2909" b="7037">tools</wd>

<space/>

<wd l="3024" t="6878" r="3346" b="7037">and</wd>

<space/>

<wd l="3451" t="6878" r="4387" b="7080">techniques</wd>

<space/>

<wd l="4507" t="6926" r="4766" b="7037">are</wd>

<space/>

<wd l="4886" t="6878" r="5789" b="7080">developed</wd>

<space/>

</ln>

<ln l="1426" t="7133" r="5784" b="7334" baseLine="7286" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7133" r="1848" b="7291">from</wd>

<space/>

<wd l="1896" t="7133" r="2218" b="7291">and</wd>

<space/>

<wd l="2266" t="7133" r="2520" b="7291">for</wd>

<space/>

<wd l="2568" t="7152" r="2976" b="7291">texts</wd>

<space/>

<wd l="3034" t="7133" r="3235" b="7291">of</wd>

<space/>

<wd l="3274" t="7133" r="4013" b="7291">standard</wd>

<space/>

<wd l="4061" t="7133" r="4901" b="7334">language,</wd>

<space/>

<wd l="4958" t="7133" r="5280" b="7291">and</wd>

<space/>

<wd l="5323" t="7133" r="5784" b="7291">UGC</wd>

<space/>

</ln>

<ln l="1426" t="7387" r="5789" b="7589" baseLine="7536" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7387" r="1560" b="7546">is</wd>

<space/>

<wd l="1651" t="7435" r="1747" b="7546">a</wd>

<space/>

<wd l="1824" t="7406" r="2198" b="7589">type</wd>

<space/>

<wd l="2285" t="7387" r="2486" b="7546">of</wd>

<space/>

<wd l="2539" t="7406" r="2870" b="7546">text</wd>

<space/>

<wd l="2957" t="7387" r="3830" b="7589">especially</wd>

<space/>

<wd l="3912" t="7387" r="4210" b="7546">full</wd>

<space/>

<wd l="4301" t="7387" r="4502" b="7546">of</wd>

<space/>

<wd l="4560" t="7387" r="5386" b="7589">creativity</wd>

<space/>

<wd l="5467" t="7387" r="5789" b="7546">and</wd>

<space/>

</ln>

<ln l="1426" t="7642" r="5779" b="7843" baseLine="7790" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7642" r="2712" b="7843">idiosyncrasies,</wd>

<space/>

<wd l="2822" t="7642" r="3360" b="7800">which</wd>

<space/>

<wd l="3456" t="7661" r="4344" b="7843">represents</wd>

<space/>

<wd l="4450" t="7642" r="4910" b="7800">noise</wd>

<space/>

<wd l="5016" t="7642" r="5270" b="7800">for</wd>

<space/>

<wd l="5366" t="7646" r="5779" b="7800">NLP</wd>

<space/>

</ln>

<ln l="1416" t="7891" r="5784" b="8093" baseLine="8045" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="7939" r="2246" b="8093">purposes.</wd>

<space/>

<wd l="2386" t="7891" r="2770" b="8050">This</wd>

<space/>

<wd l="2899" t="7939" r="3394" b="8093">paper</wd>

<space/>

<wd l="3514" t="7910" r="4234" b="8093">presents</wd>

<space/>

<wd l="4368" t="7891" r="5549" b="8083">UGCNormal,</wd>

<space/>

<wd l="5688" t="7939" r="5784" b="8050">a</wd>

<space/>

</ln>

<ln l="1426" t="8146" r="5779" b="8304" baseLine="8294" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8146" r="2645" b="8304">lexicon-based</wd>

<space/>

<wd l="2774" t="8146" r="3110" b="8304">tool</wd>

<space/>

<wd l="3254" t="8146" r="3509" b="8304">for</wd>

<space/>

<wd l="3643" t="8146" r="4099" b="8304">UGC</wd>

<space/>

<wd l="4238" t="8146" r="5501" b="8304">normalization.</wd>

<space/>

<wd l="5654" t="8150" r="5779" b="8304">It</wd>

<space/>

</ln>

<ln l="1426" t="8400" r="5779" b="8602" baseLine="8549" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8448" r="2563" b="8602">encompasses</wd>

<space/>

<wd l="2616" t="8448" r="2712" b="8558">a</wd>

<space/>

<wd l="2750" t="8400" r="3619" b="8592">tokenizer,</wd>

<space/>

<wd l="3672" t="8448" r="3768" b="8558">a</wd>

<space/>

<wd l="3816" t="8419" r="4565" b="8558">sentence</wd>

<space/>

<wd l="4618" t="8400" r="5779" b="8602">segmentation</wd>

<space/>

</ln>

<ln l="1421" t="8650" r="5779" b="8851" baseLine="8803" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8650" r="1814" b="8842">tool,</wd>

<space/>

<wd l="1886" t="8698" r="1982" b="8808">a</wd>

<space/>

<wd l="2035" t="8650" r="3379" b="8851">phonetic-based</wd>

<space/>

<wd l="3446" t="8650" r="4027" b="8851">speller</wd>

<space/>

<wd l="4090" t="8650" r="4411" b="8808">and</wd>

<space/>

<wd l="4478" t="8698" r="4930" b="8808">some</wd>

<space/>

<wd l="5002" t="8650" r="5779" b="8842">lexicons,</wd>

<space/>

</ln>

<ln l="1421" t="8904" r="5784" b="9106" baseLine="9058" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8904" r="1958" b="9062">which</wd>

<space/>

<wd l="2030" t="8952" r="2453" b="9062">were</wd>

<space/>

<wd l="2534" t="8904" r="3427" b="9106">originated</wd>

<space/>

<wd l="3499" t="8904" r="3922" b="9062">from</wd>

<space/>

<wd l="3994" t="8952" r="4090" b="9062">a</wd>

<space/>

<wd l="4162" t="8904" r="4570" b="9106">deep</wd>

<space/>

<wd l="4651" t="8904" r="5352" b="9106">analysis</wd>

<space/>

<wd l="5434" t="8904" r="5635" b="9062">of</wd>

<space/>

<wd l="5688" t="8952" r="5784" b="9062">a</wd>

<space/>

</ln>

<ln l="1426" t="9158" r="5774" b="9360" baseLine="9307" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9206" r="2002" b="9360">corpus</wd>

<space/>

<wd l="2261" t="9158" r="2462" b="9317">of</wd>

<space/>

<wd l="2688" t="9158" r="3365" b="9360">product</wd>

<space/>

<wd l="3619" t="9158" r="4296" b="9317">reviews</wd>

<space/>

<wd l="4555" t="9158" r="4723" b="9312">in</wd>

<space/>

<wd l="4978" t="9158" r="5774" b="9317">Brazilian</wd>

<space/>

</ln>

<ln l="1421" t="9413" r="5779" b="9614" baseLine="9562" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="9418" r="2438" b="9614">Portuguese.</wd>

<space/>

<wd l="2515" t="9413" r="2851" b="9571">The</wd>

<space/>

<wd l="2909" t="9413" r="3869" b="9571">normalizer</wd>

<space/>

<wd l="3922" t="9461" r="4258" b="9571">was</wd>

<space/>

<wd l="4325" t="9413" r="5170" b="9571">evaluated</wd>

<space/>

<wd l="5227" t="9413" r="5395" b="9566">in</wd>

<space/>

<wd l="5453" t="9432" r="5779" b="9571">two</wd>

<space/>

</ln>

<ln l="1426" t="9662" r="5779" b="9826" baseLine="9816" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9662" r="2179" b="9821">different</wd>

<space/>

<wd l="2270" t="9662" r="2635" b="9821">data</wd>

<space/>

<wd l="2726" t="9682" r="3043" b="9821">sets</wd>

<space/>

<wd l="3139" t="9662" r="3456" b="9821">and</wd>

<space/>

<wd l="3547" t="9662" r="4157" b="9821">carried</wd>

<space/>

<wd l="4243" t="9682" r="4522" b="9821">out</wd>

<space/>

<wd l="4613" t="9662" r="5040" b="9821">from</wd>

<space/>

<wd l="5126" t="9662" r="5520" b="9826">31%</wd>

<space/>

<wd l="5611" t="9682" r="5779" b="9821">to</wd>

<space/>

</ln>

<ln l="1430" t="9917" r="5789" b="10118" baseLine="10066" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="9917" r="1819" b="10080">89%</wd>

<space/>

<wd l="1886" t="9917" r="2088" b="10075">of</wd>

<space/>

<wd l="2117" t="9917" r="2386" b="10075">the</wd>

<space/>

<wd l="2448" t="9917" r="3442" b="10118">appropriate</wd>

<space/>

<wd l="3509" t="9917" r="4531" b="10109">corrections,</wd>

<space/>

<wd l="4598" t="9917" r="5510" b="10118">depending</wd>

<space/>

<wd l="5573" t="9965" r="5789" b="10075">on</wd>

<space/>

</ln>

<ln l="1421" t="10171" r="5779" b="10373" baseLine="10320" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="10171" r="1685" b="10330">the</wd>

<space/>

<wd l="1728" t="10190" r="2107" b="10373">type</wd>

<space/>

<wd l="2155" t="10171" r="2357" b="10330">of</wd>

<space/>

<wd l="2381" t="10190" r="2712" b="10330">text</wd>

<space/>

<wd l="2750" t="10171" r="3259" b="10330">noise.</wd>

<space/>

<wd l="3317" t="10171" r="3653" b="10330">The</wd>

<space/>

<wd l="3701" t="10219" r="3989" b="10330">use</wd>

<space/>

<wd l="4042" t="10171" r="4243" b="10330">of</wd>

<space/>

<wd l="4262" t="10171" r="5395" b="10330">UGCNormal</wd>

<space/>

<wd l="5443" t="10219" r="5779" b="10330">was</wd>

<space/>

</ln>

<ln l="1426" t="10421" r="5784" b="10622" baseLine="10574" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10421" r="1771" b="10579">also</wd>

<space/>

<wd l="1867" t="10421" r="2678" b="10579">validated</wd>

<space/>

<wd l="2770" t="10421" r="2938" b="10574">in</wd>

<space/>

<wd l="3034" t="10469" r="3130" b="10579">a</wd>

<space/>

<wd l="3221" t="10421" r="3581" b="10579">task</wd>

<space/>

<wd l="3672" t="10421" r="3874" b="10579">of</wd>

<space/>

<wd l="3941" t="10421" r="4339" b="10579">POS</wd>

<space/>

<wd l="4440" t="10421" r="5146" b="10622">tagging,</wd>

<space/>

<wd l="5246" t="10421" r="5784" b="10579">which</wd>

<space/>

</ln>

<ln l="1426" t="10675" r="5789" b="10877" baseLine="10829" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10675" r="2266" b="10877">improved</wd>

<space/>

<wd l="2309" t="10675" r="2736" b="10834">from</wd>

<space/>

<wd l="2779" t="10675" r="3446" b="10838">91.35%</wd>

<space/>

<wd l="3494" t="10694" r="3662" b="10834">to</wd>

<space/>

<wd l="3715" t="10675" r="4382" b="10838">93.15%</wd>

<space/>

<wd l="4435" t="10675" r="4603" b="10829">in</wd>

<space/>

<wd l="4651" t="10723" r="5429" b="10877">accuracy</wd>

<space/>

<wd l="5472" t="10675" r="5789" b="10834">and</wd>

<space/>

</ln>

<ln l="1426" t="10930" r="5789" b="11131" baseLine="11078" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10930" r="1589" b="11083">in</wd>

<space/>

<wd l="1637" t="10978" r="1733" b="11088">a</wd>

<space/>

<wd l="1771" t="10930" r="2131" b="11088">task</wd>

<space/>

<wd l="2170" t="10930" r="2371" b="11088">of</wd>

<space/>

<wd l="2395" t="10930" r="3062" b="11131">opinion</wd>

<space/>

<wd l="3110" t="10930" r="4315" b="11122">classification,</wd>

<space/>

<wd l="4363" t="10930" r="4901" b="11088">which</wd>

<space/>

<wd l="4944" t="10930" r="5789" b="11131">improved</wd>

<space/>

</ln>

<ln l="1421" t="11179" r="5784" b="11381" baseLine="11333" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="11179" r="1685" b="11338">the</wd>

<space/>

<wd l="1853" t="11227" r="2530" b="11381">average</wd>

<space/>

<wd l="2698" t="11179" r="2894" b="11338">of</wd>

<space/>

<wd l="3034" t="11179" r="3802" b="11338">F1-score</wd>

<space/>

<wd l="3965" t="11227" r="4781" b="11338">measures</wd>

<space/>

<wd l="4949" t="11179" r="5784" b="11381">(F1-score</wd>

<space/>

</ln>

<ln l="1416" t="11434" r="5779" b="11635" baseLine="11587" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="11434" r="2112" b="11635">positive</wd>

<space/>

<wd l="2232" t="11434" r="2549" b="11592">and</wd>

<space/>

<wd l="2659" t="11434" r="3422" b="11592">F1-score</wd>

<space/>

<wd l="3538" t="11434" r="4349" b="11635">negative)</wd>

<space/>

<wd l="4474" t="11434" r="4896" b="11592">from</wd>

<space/>

<wd l="5011" t="11434" r="5496" b="11592">0.736</wd>

<space/>

<wd l="5611" t="11453" r="5779" b="11592">to</wd>

<space/>

</ln>

<ln l="1426" t="11688" r="1958" b="11846" baseLine="11837" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="11688" r="1958" b="11846">0.758.</wd>

</ln>

</para>

<para l="1435" t="12446" r="3082" b="12614" alignment="left" spaceBefore="499" lsp="exactly" lspExact="271" language="en">

<ln l="1435" t="12446" r="3082" b="12614" baseLine="12605" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="6">

<wd l="1435" t="12446" r="1594" b="12614">1.</wd>

<space/>

<wd l="1786" t="12446" r="3082" b="12614">Introduction</wd>

</ln>

</para>

<para l="1421" t="12869" r="5808" b="15346" alignment="justified" spaceBefore="161" lsp="exactly" lspExact="252" language="en">

<ln l="1426" t="12869" r="5789" b="13070" baseLine="13022" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="12869" r="1762" b="13027">The</wd>

<space/>

<wd l="1824" t="12869" r="2726" b="13070">increasing</wd>

<space/>

<wd l="2784" t="12869" r="3437" b="13027">volume</wd>

<space/>

<wd l="3504" t="12869" r="3706" b="13027">of</wd>

<space/>

<wd l="3744" t="12888" r="4080" b="13027">text</wd>

<space/>

<wd l="4138" t="12869" r="4718" b="13070">posted</wd>

<space/>

<wd l="4771" t="12869" r="4997" b="13070">by</wd>

<space/>

<wd l="5054" t="12917" r="5501" b="13027">users</wd>

<space/>

<wd l="5573" t="12917" r="5789" b="13027">on</wd>

<space/>

</ln>

<ln l="1421" t="13123" r="5779" b="13325" baseLine="13272" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="13123" r="1685" b="13282">the</wd>

<space/>

<wd l="1834" t="13123" r="2194" b="13282">web</wd>

<space/>

<wd l="2347" t="13123" r="2486" b="13282">is</wd>

<space/>

<wd l="2630" t="13123" r="3408" b="13325">regarded</wd>

<space/>

<wd l="3552" t="13171" r="3725" b="13282">as</wd>

<space/>

<wd l="3874" t="13171" r="4080" b="13282">an</wd>

<space/>

<wd l="4229" t="13123" r="5102" b="13325">extremely</wd>

<space/>

<wd l="5246" t="13123" r="5779" b="13282">useful</wd>

<space/>

</ln>

<ln l="1426" t="13373" r="5784" b="13574" baseLine="13526" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="13373" r="2448" b="13574">opportunity</wd>

<space/>

<wd l="2563" t="13392" r="2731" b="13531">to</wd>

<space/>

<wd l="2851" t="13373" r="3384" b="13531">reveal</wd>

<space/>

<wd l="3499" t="13373" r="4046" b="13574">public</wd>

<space/>

<wd l="4176" t="13373" r="4843" b="13574">opinion</wd>

<space/>

<wd l="4963" t="13421" r="5179" b="13531">on</wd>

<space/>

<wd l="5299" t="13421" r="5784" b="13574">many</wd>

<space/>

</ln>

<ln l="1426" t="13627" r="5774" b="13829" baseLine="13781" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="13627" r="1992" b="13786">issues.</wd>

<space/>

<wd l="2112" t="13632" r="2424" b="13786">For</wd>

<space/>

<wd l="2539" t="13675" r="2635" b="13786">a</wd>

<space/>

<wd l="2746" t="13627" r="3355" b="13829">variety</wd>

<space/>

<wd l="3470" t="13627" r="3672" b="13786">of</wd>

<space/>

<wd l="3763" t="13675" r="4474" b="13819">reasons,</wd>

<space/>

<wd l="4598" t="13646" r="5774" b="13829">governments,</wd>

<space/>

</ln>

<ln l="1426" t="13882" r="5784" b="14083" baseLine="14030" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="13882" r="2410" b="14083">companies,</wd>

<space/>

<wd l="2510" t="13882" r="3235" b="14083">political</wd>

<space/>

<wd l="3346" t="13882" r="4320" b="14074">candidates,</wd>

<space/>

<wd l="4430" t="13882" r="4752" b="14040">and</wd>

<space/>

<wd l="4853" t="13930" r="5784" b="14040">consumers</wd>

<space/>

</ln>

<ln l="1421" t="14136" r="5808" b="14338" baseLine="14285" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14155" r="1848" b="14294">want</wd>

<space/>

<wd l="1930" t="14155" r="2098" b="14294">to</wd>

<space/>

<wd l="2194" t="14136" r="2846" b="14338">explore</wd>

<space/>

<wd l="2942" t="14136" r="3336" b="14294">such</wd>

<space/>

<wd l="3422" t="14136" r="3782" b="14294">web</wd>

<space/>

<wd l="3878" t="14155" r="4565" b="14294">content.</wd>

<space/>

<wd l="4666" t="14136" r="5050" b="14294">This</wd>

<space/>

<wd l="5141" t="14155" r="5515" b="14338">type</wd>

<space/>

<wd l="5611" t="14136" r="5808" b="14294">of</wd>

<space/>

</ln>

<ln l="1421" t="14386" r="5784" b="14587" baseLine="14539" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14405" r="1747" b="14544">text</wd>

<space/>

<wd l="1824" t="14386" r="1963" b="14544">is</wd>

<space/>

<wd l="2035" t="14386" r="2736" b="14544">referred</wd>

<space/>

<wd l="2798" t="14405" r="2966" b="14544">to</wd>

<space/>

<wd l="3043" t="14386" r="3211" b="14539">in</wd>

<space/>

<wd l="3283" t="14386" r="3547" b="14544">the</wd>

<space/>

<wd l="3624" t="14386" r="4411" b="14544">literature</wd>

<space/>

<wd l="4488" t="14434" r="4666" b="14544">as</wd>

<space/>

<wd l="4738" t="14386" r="5198" b="14544">UGC</wd>

<space/>

<wd l="5280" t="14386" r="5784" b="14587">(user-</wd>

</ln>

<ln l="1426" t="14640" r="5779" b="14842" baseLine="14794" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="14640" r="2280" b="14842">generated</wd>

<space/>

<wd l="2328" t="14640" r="3034" b="14842">content)</wd>

<space/>

<wd l="3096" t="14688" r="3274" b="14798">or</wd>

<space/>

<wd l="3322" t="14645" r="3960" b="14798">EWoM</wd>

<space/>

<wd l="4022" t="14640" r="4949" b="14842">(electronic</wd>

<space/>

<wd l="5002" t="14640" r="5779" b="14798">word-of-</wd>

</ln>

<ln l="1421" t="14894" r="5789" b="15096" baseLine="15043" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14894" r="2098" b="15096">mouth).</wd>

<space/>

<wd l="2160" t="14899" r="3014" b="15086">However,</wd>

<space/>

<wd l="3077" t="14894" r="3389" b="15053">due</wd>

<space/>

<wd l="3442" t="14914" r="3610" b="15053">to</wd>

<space/>

<wd l="3662" t="14894" r="3926" b="15053">the</wd>

<space/>

<wd l="3984" t="14894" r="4421" b="15096">large</wd>

<space/>

<wd l="4478" t="14914" r="5131" b="15053">amount</wd>

<space/>

<wd l="5189" t="14894" r="5390" b="15053">of</wd>

<space/>

<wd l="5424" t="14894" r="5789" b="15053">data</wd>

<space/>

</ln>

<ln l="1426" t="15144" r="5784" b="15346" baseLine="15298" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="15144" r="2261" b="15336">available,</wd>

<space/>

<wd l="2347" t="15144" r="2462" b="15302">it</wd>

<space/>

<wd l="2539" t="15144" r="2678" b="15302">is</wd>

<space/>

<wd l="2760" t="15144" r="3706" b="15346">impossible</wd>

<space/>

<wd l="3787" t="15144" r="4042" b="15302">for</wd>

<space/>

<wd l="4109" t="15144" r="4786" b="15302">humans</wd>

<space/>

<wd l="4862" t="15163" r="5030" b="15302">to</wd>

<space/>

<wd l="5117" t="15144" r="5784" b="15346">analyze</wd>

</ln>

</para>

</column>

<column l="6120" t="5070" r="10517" b="15356">

<para l="6120" t="5112" r="10512" b="8352" alignment="justified" lsp="exactly" lspExact="252" language="en">

<ln l="6130" t="5112" r="10483" b="5304" baseLine="5266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5112" r="6341" b="5270">all</wd>

<space/>

<wd l="6442" t="5112" r="7229" b="5270">available</wd>

<space/>

<wd l="7325" t="5112" r="7786" b="5270">UGC</wd>

<space/>

<wd l="7882" t="5112" r="8141" b="5270">for</wd>

<space/>

<wd l="8227" t="5131" r="8654" b="5270">most</wd>

<space/>

<wd l="8750" t="5112" r="9312" b="5270">issues.</wd>

<space/>

<wd l="9418" t="5112" r="9658" b="5270">As</wd>

<space/>

<wd l="9758" t="5160" r="9854" b="5270">a</wd>

<space/>

<wd l="9941" t="5112" r="10483" b="5304">result,</wd>

<space/>

</ln>

<ln l="6120" t="5366" r="10507" b="5568" baseLine="5515" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5366" r="7066" b="5568">processing</wd>

<space/>

<wd l="7138" t="5366" r="7454" b="5525">and</wd>

<space/>

<wd l="7526" t="5366" r="8376" b="5568">analyzing</wd>

<space/>

<wd l="8443" t="5366" r="8904" b="5525">UGC</wd>

<space/>

<wd l="8971" t="5366" r="9643" b="5525">became</wd>

<space/>

<wd l="9720" t="5414" r="9816" b="5525">a</wd>

<space/>

<wd l="9878" t="5366" r="10243" b="5525">task</wd>

<space/>

<wd l="10310" t="5366" r="10507" b="5525">of</wd>

<space/>

</ln>

<ln l="6120" t="5621" r="10488" b="5822" baseLine="5770" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5626" r="6533" b="5779">NLP</wd>

<space/>

<wd l="6778" t="5621" r="7498" b="5822">(Natural</wd>

<space/>

<wd l="7733" t="5626" r="8597" b="5822">Language</wd>

<space/>

<wd l="8832" t="5621" r="9902" b="5822">Processing).</wd>

<space/>

<wd l="10147" t="5621" r="10488" b="5779">The</wd>

<space/>

</ln>

<ln l="6120" t="5870" r="10483" b="6072" baseLine="6024" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="5870" r="6859" b="6072">problem</wd>

<space/>

<wd l="6936" t="5870" r="7075" b="6029">is</wd>

<space/>

<wd l="7157" t="5870" r="7536" b="6062">that,</wd>

<space/>

<wd l="7618" t="5870" r="8016" b="6029">until</wd>

<space/>

<wd l="8098" t="5918" r="8525" b="6062">now,</wd>

<space/>

<wd l="8611" t="5870" r="9197" b="6029">almost</wd>

<space/>

<wd l="9278" t="5870" r="9490" b="6029">all</wd>

<space/>

<wd l="9566" t="5875" r="9979" b="6029">NLP</wd>

<space/>

<wd l="10056" t="5870" r="10483" b="6029">tools</wd>

<space/>

</ln>

<ln l="6130" t="6125" r="10483" b="6326" baseLine="6274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6125" r="6446" b="6283">and</wd>

<space/>

<wd l="6552" t="6125" r="7488" b="6326">techniques</wd>

<space/>

<wd l="7603" t="6173" r="8030" b="6283">were</wd>

<space/>

<wd l="8146" t="6125" r="9053" b="6326">developed</wd>

<space/>

<wd l="9158" t="6125" r="9634" b="6317">from,</wd>

<space/>

<wd l="9754" t="6125" r="10075" b="6283">and</wd>

<space/>

<wd l="10186" t="6125" r="10483" b="6317">for,</wd>

<space/>

</ln>

<ln l="6134" t="6379" r="10488" b="6581" baseLine="6528" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6379" r="6878" b="6538">standard</wd>

<space/>

<wd l="6931" t="6379" r="7718" b="6581">language</wd>

<space/>

<wd l="7771" t="6398" r="8155" b="6571">text,</wd>

<space/>

<wd l="8213" t="6379" r="8496" b="6538">but</wd>

<space/>

<wd l="8549" t="6379" r="9010" b="6538">UGC</wd>

<space/>

<wd l="9072" t="6379" r="9787" b="6581">displays</wd>

<space/>

<wd l="9854" t="6427" r="9950" b="6538">a</wd>

<space/>

<wd l="10003" t="6427" r="10488" b="6581">range</wd>

<space/>

</ln>

<ln l="6130" t="6629" r="10493" b="6830" baseLine="6782" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6629" r="6331" b="6787">of</wd>

<space/>

<wd l="6408" t="6629" r="7094" b="6787">creative</wd>

<space/>

<wd l="7195" t="6629" r="7512" b="6787">and</wd>

<space/>

<wd l="7603" t="6629" r="8731" b="6830">idiosyncratic</wd>

<space/>

<wd l="8832" t="6629" r="9854" b="6821">differences,</wd>

<space/>

<wd l="9955" t="6629" r="10493" b="6787">which</wd>

<space/>

</ln>

<ln l="6125" t="6883" r="10488" b="7085" baseLine="7037" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6902" r="6931" b="7085">represent</wd>

<space/>

<wd l="7037" t="6883" r="7502" b="7042">noise</wd>

<space/>

<wd l="7618" t="6883" r="7872" b="7042">for</wd>

<space/>

<wd l="7973" t="6888" r="8386" b="7042">NLP</wd>

<space/>

<wd l="8496" t="6931" r="9331" b="7085">purposes.</wd>

<space/>

<wd l="9456" t="6888" r="9634" b="7037">In</wd>

<space/>

<wd l="9749" t="6883" r="10214" b="7042">order</wd>

<space/>

<wd l="10320" t="6902" r="10488" b="7042">to</wd>

<space/>

</ln>

<ln l="6125" t="7138" r="10488" b="7339" baseLine="7286" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7186" r="6586" b="7296">reuse</wd>

<space/>

<wd l="6758" t="7138" r="7022" b="7296">the</wd>

<space/>

<wd l="7190" t="7142" r="7603" b="7296">NLP</wd>

<space/>

<wd l="7771" t="7138" r="8198" b="7296">tools</wd>

<space/>

<wd l="8366" t="7157" r="8534" b="7296">to</wd>

<space/>

<wd l="8702" t="7186" r="9365" b="7339">process</wd>

<space/>

<wd l="9533" t="7138" r="10046" b="7330">UGC,</wd>

<space/>

<wd l="10219" t="7138" r="10488" b="7296">the</wd>

<space/>

</ln>

<ln l="6125" t="7392" r="10512" b="7594" baseLine="7541" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7392" r="7344" b="7550">normalization</wd>

<space/>

<wd l="7426" t="7440" r="7603" b="7550">or</wd>

<space/>

<wd l="7685" t="7392" r="9019" b="7550">standardization</wd>

<space/>

<wd l="9101" t="7392" r="9298" b="7550">of</wd>

<space/>

<wd l="9350" t="7392" r="9667" b="7550">this</wd>

<space/>

<wd l="9749" t="7440" r="10234" b="7594">genre</wd>

<space/>

<wd l="10310" t="7392" r="10512" b="7550">of</wd>

<space/>

</ln>

<ln l="6125" t="7642" r="10483" b="7843" baseLine="7795" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7661" r="6456" b="7800">text</wd>

<space/>

<wd l="6605" t="7642" r="7277" b="7800">became</wd>

<space/>

<wd l="7435" t="7690" r="7632" b="7800">an</wd>

<space/>

<wd l="7795" t="7642" r="8544" b="7800">essential</wd>

<space/>

<wd l="8698" t="7642" r="9926" b="7843">preprocessing</wd>

<space/>

<wd l="10085" t="7661" r="10483" b="7843">step,</wd>

<space/>

</ln>

<ln l="6130" t="7896" r="10488" b="8098" baseLine="8045" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7896" r="6734" b="8098">aiming</wd>

<space/>

<wd l="6850" t="7915" r="7018" b="8054">to</wd>

<space/>

<wd l="7138" t="7896" r="7608" b="8054">make</wd>

<space/>

<wd l="7733" t="7896" r="8194" b="8054">UGC</wd>

<space/>

<wd l="8318" t="7944" r="8491" b="8054">as</wd>

<space/>

<wd l="8616" t="7896" r="9062" b="8054">close</wd>

<space/>

<wd l="9187" t="7944" r="9360" b="8054">as</wd>

<space/>

<wd l="9475" t="7896" r="10200" b="8098">possible</wd>

<space/>

<wd l="10320" t="7915" r="10488" b="8054">to</wd>

<space/>

</ln>

<ln l="6134" t="8150" r="7757" b="8352" baseLine="8299" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="8150" r="6878" b="8309">standard</wd>

<space/>

<wd l="6931" t="8150" r="7757" b="8352">language.</wd>

</ln>

</para>

<para l="6120" t="8400" r="10493" b="11131" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="8400" r="10488" b="8602" baseLine="8554" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="8400" r="6696" b="8558">The</wd>

<space/>

<wd l="6754" t="8400" r="7171" b="8558">level</wd>

<space/>

<wd l="7234" t="8400" r="7435" b="8558">of</wd>

<space/>

<wd l="7464" t="8400" r="7925" b="8558">noise</wd>

<space/>

<wd l="7982" t="8400" r="8150" b="8554">in</wd>

<space/>

<wd l="8203" t="8400" r="8664" b="8558">UGC</wd>

<space/>

<wd l="8722" t="8400" r="9245" b="8558">varies</wd>

<space/>

<wd l="9307" t="8400" r="10219" b="8602">depending</wd>

<space/>

<wd l="10272" t="8448" r="10488" b="8558">on</wd>

<space/>

</ln>

<ln l="6125" t="8654" r="10493" b="8856" baseLine="8808" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="8654" r="6394" b="8813">the</wd>

<space/>

<wd l="6523" t="8654" r="7022" b="8813">social</wd>

<space/>

<wd l="7147" t="8654" r="7685" b="8813">media</wd>

<space/>

<wd l="7810" t="8654" r="7973" b="8808">in</wd>

<space/>

<wd l="8093" t="8654" r="8630" b="8813">which</wd>

<space/>

<wd l="8760" t="8654" r="8880" b="8813">it</wd>

<space/>

<wd l="9000" t="8654" r="9139" b="8813">is</wd>

<space/>

<wd l="9259" t="8654" r="9883" b="8856">posted.</wd>

<space/>

<wd l="10022" t="8654" r="10493" b="8813">Short</wd>

<space/>

</ln>

<ln l="6125" t="8909" r="10483" b="9110" baseLine="9058" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="8957" r="6946" b="9110">messages</wd>

<space/>

<wd l="7013" t="8909" r="7517" b="9110">(SMS</wd>

<space/>

<wd l="7589" t="8909" r="7906" b="9067">and</wd>

<space/>

<wd l="7958" t="8909" r="8995" b="9110">microblogs,</wd>

<space/>

<wd l="9072" t="8909" r="9470" b="9067">such</wd>

<space/>

<wd l="9533" t="8957" r="9706" b="9067">as</wd>

<space/>

<wd l="9773" t="8909" r="10483" b="9110">Twitter)</wd>

<space/>

</ln>

<ln l="6125" t="9158" r="10483" b="9360" baseLine="9312" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9158" r="6509" b="9317">tend</wd>

<space/>

<wd l="6562" t="9178" r="6725" b="9317">to</wd>

<space/>

<wd l="6782" t="9158" r="6994" b="9317">be</wd>

<space/>

<wd l="7046" t="9158" r="7536" b="9317">much</wd>

<space/>

<wd l="7594" t="9158" r="8198" b="9317">noisier</wd>

<space/>

<wd l="8246" t="9158" r="8621" b="9317">than</wd>

<space/>

<wd l="8678" t="9178" r="9091" b="9317">texts</wd>

<space/>

<wd l="9149" t="9158" r="9734" b="9360">posted</wd>

<space/>

<wd l="9787" t="9158" r="9955" b="9312">in</wd>

<space/>

<wd l="10008" t="9158" r="10483" b="9360">blogs</wd>

<space/>

</ln>

<ln l="6130" t="9413" r="10483" b="9605" baseLine="9566" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9413" r="6446" b="9571">and</wd>

<space/>

<wd l="6518" t="9413" r="6898" b="9571">sites</wd>

<space/>

<wd l="6974" t="9413" r="7171" b="9571">of</wd>

<space/>

<wd l="7219" t="9413" r="7954" b="9605">reviews,</wd>

<space/>

<wd l="8030" t="9461" r="8208" b="9571">as</wd>

<space/>

<wd l="8280" t="9461" r="8726" b="9571">users</wd>

<space/>

<wd l="8798" t="9413" r="9216" b="9571">need</wd>

<space/>

<wd l="9278" t="9432" r="9446" b="9571">to</wd>

<space/>

<wd l="9514" t="9413" r="9725" b="9571">be</wd>

<space/>

<wd l="9797" t="9413" r="10483" b="9571">creative</wd>

<space/>

</ln>

<ln l="6125" t="9667" r="10488" b="9869" baseLine="9816" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9686" r="6293" b="9826">to</wd>

<space/>

<wd l="6365" t="9667" r="6725" b="9826">deal</wd>

<space/>

<wd l="6792" t="9667" r="7186" b="9826">with</wd>

<space/>

<wd l="7253" t="9667" r="8059" b="9826">character</wd>

<space/>

<wd l="8126" t="9667" r="9062" b="9826">limitations</wd>

<space/>

<wd l="9134" t="9667" r="9528" b="9869">(140</wd>

<space/>

<wd l="9605" t="9667" r="10488" b="9826">characters</wd>

<space/>

</ln>

<ln l="6130" t="9922" r="10493" b="10123" baseLine="10070" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9922" r="6384" b="10080">for</wd>

<space/>

<wd l="6451" t="9922" r="7099" b="10080">Twitter</wd>

<space/>

<wd l="7166" t="9922" r="7488" b="10080">and</wd>

<space/>

<wd l="7574" t="9922" r="7877" b="10080">160</wd>

<space/>

<wd l="7954" t="9922" r="8208" b="10080">for</wd>

<space/>

<wd l="8285" t="9922" r="8832" b="10123">SMS).</wd>

<space/>

<wd l="8914" t="9922" r="9254" b="10080">The</wd>

<space/>

<wd l="9326" t="9922" r="10162" b="10123">challenge</wd>

<space/>

<wd l="10238" t="9922" r="10493" b="10080">for</wd>

<space/>

</ln>

<ln l="6120" t="10171" r="10488" b="10373" baseLine="10325" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="10176" r="6533" b="10330">NLP</wd>

<space/>

<wd l="6629" t="10171" r="6768" b="10330">is</wd>

<space/>

<wd l="6854" t="10190" r="7022" b="10330">to</wd>

<space/>

<wd l="7118" t="10171" r="7987" b="10330">determine</wd>

<space/>

<wd l="8074" t="10171" r="8342" b="10330">the</wd>

<space/>

<wd l="8434" t="10190" r="9062" b="10373">aspects</wd>

<space/>

<wd l="9154" t="10171" r="9322" b="10325">in</wd>

<space/>

<wd l="9403" t="10171" r="9941" b="10330">which</wd>

<space/>

<wd l="10027" t="10171" r="10488" b="10330">UGC</wd>

<space/>

</ln>

<ln l="6130" t="10426" r="10483" b="10627" baseLine="10574" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10426" r="6840" b="10584">deviates</wd>

<space/>

<wd l="6984" t="10426" r="7406" b="10584">from</wd>

<space/>

<wd l="7546" t="10426" r="8290" b="10584">standard</wd>

<space/>

<wd l="8419" t="10426" r="9202" b="10627">language</wd>

<space/>

<wd l="9341" t="10426" r="9662" b="10584">and</wd>

<space/>

<wd l="9792" t="10426" r="10483" b="10627">develop</wd>

<space/>

</ln>

<ln l="6134" t="10680" r="10483" b="10882" baseLine="10829" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="10680" r="6950" b="10882">strategies</wd>

<space/>

<wd l="7022" t="10699" r="7190" b="10838">to</wd>

<space/>

<wd l="7267" t="10680" r="7627" b="10838">deal</wd>

<space/>

<wd l="7699" t="10680" r="8088" b="10838">with</wd>

<space/>

<wd l="8155" t="10680" r="8424" b="10838">the</wd>

<space/>

<wd l="8491" t="10680" r="9715" b="10838">normalization</wd>

<space/>

<wd l="9792" t="10680" r="9989" b="10838">of</wd>

<space/>

<wd l="10037" t="10680" r="10483" b="10838">these</wd>

<space/>

</ln>

<ln l="6130" t="10934" r="6802" b="11131" baseLine="11083" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10949" r="6802" b="11131">aspects.</wd>

</ln>

</para>

<para l="6120" t="11184" r="10512" b="13157" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="11184" r="10498" b="11386" baseLine="11338">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6350" t="11189" r="6869" b="11386">Many</wd>

<space/>

<wd l="7018" t="11184" r="7219" b="11342">of</wd>

<space/>

</run>

<wd l="7344" t="11184" r="7963" b="11342"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">UGC</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">’s</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="8122" t="11184" r="9019" b="11342">deviations</wd>

<space/>

<wd l="9178" t="11184" r="9600" b="11342">from</wd>

<space/>

<wd l="9754" t="11184" r="10498" b="11342">standard</wd>

<space/>

</run>

</ln>

<ln l="6130" t="11438" r="10483" b="11640" baseLine="11587" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="11438" r="6917" b="11640">language</wd>

<space/>

<wd l="7051" t="11486" r="7310" b="11597">are</wd>

<space/>

<wd l="7440" t="11438" r="8323" b="11597">motivated</wd>

<space/>

<wd l="8438" t="11438" r="8664" b="11640">by</wd>

<space/>

<wd l="8789" t="11438" r="9624" b="11640">wordplay</wd>

<space/>

<wd l="9754" t="11438" r="10483" b="11640">(U=you,</wd>

<space/>

</ln>

<ln l="6125" t="11688" r="10483" b="11890" baseLine="11842" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="11688" r="6734" b="11890">4=for),</wd>

<space/>

<wd l="6792" t="11688" r="7018" b="11890">by</wd>

<space/>

<wd l="7070" t="11688" r="7339" b="11846">the</wd>

<space/>

<wd l="7397" t="11688" r="7819" b="11846">need</wd>

<space/>

<wd l="7867" t="11707" r="8035" b="11846">to</wd>

<space/>

<wd l="8107" t="11736" r="8486" b="11846">save</wd>

<space/>

<wd l="8554" t="11736" r="9034" b="11890">space</wd>

<space/>

<wd l="9091" t="11688" r="9605" b="11890">(short</wd>

<space/>

<wd l="9658" t="11736" r="10483" b="11890">messages</wd>

<space/>

</ln>

<ln l="6125" t="11942" r="10512" b="12144" baseLine="12096" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="11942" r="6538" b="12101">have</wd>

<space/>

<wd l="6696" t="11990" r="6792" b="12101">a</wd>

<space/>

<wd l="6941" t="11942" r="7570" b="12101">limited</wd>

<space/>

<wd l="7718" t="11942" r="8386" b="12144">length),</wd>

<space/>

<wd l="8534" t="11942" r="8760" b="12144">by</wd>

<space/>

<wd l="8914" t="11942" r="9182" b="12101">the</wd>

<space/>

<wd l="9341" t="11942" r="10152" b="12101">influence</wd>

<space/>

<wd l="10310" t="11942" r="10512" b="12101">of</wd>

<space/>

</ln>

<ln l="6120" t="12197" r="10474" b="12398" baseLine="12346" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="12197" r="7397" b="12398">pronunciation,</wd>

<space/>

<wd l="7469" t="12245" r="7651" b="12355">or</wd>

<space/>

<wd l="7718" t="12245" r="8126" b="12355">even</wd>

<space/>

<wd l="8189" t="12197" r="8414" b="12398">by</wd>

<space/>

<wd l="8482" t="12245" r="8578" b="12355">a</wd>

<space/>

<wd l="8645" t="12197" r="8971" b="12355">low</wd>

<space/>

<wd l="9038" t="12197" r="9456" b="12355">level</wd>

<space/>

<wd l="9533" t="12197" r="9734" b="12355">of</wd>

<space/>

<wd l="9778" t="12197" r="10474" b="12398">literacy.</wd>

<space/>

</ln>

<ln l="6125" t="12451" r="10493" b="12653" baseLine="12600" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="12451" r="7085" b="12653">Regardless</wd>

<space/>

<wd l="7152" t="12451" r="7354" b="12610">of</wd>

<space/>

<wd l="7387" t="12451" r="7651" b="12610">the</wd>

<space/>

<wd l="7718" t="12499" r="8280" b="12610">causes</wd>

<space/>

<wd l="8347" t="12451" r="8549" b="12610">of</wd>

<space/>

<wd l="8578" t="12451" r="9038" b="12610">UGC</wd>

<space/>

<wd l="9106" t="12451" r="10003" b="12610">deviations</wd>

<space/>

<wd l="10066" t="12451" r="10493" b="12610">from</wd>

<space/>

</ln>

<ln l="6134" t="12701" r="10493" b="12902" baseLine="12854" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="12701" r="6878" b="12859">standard</wd>

<space/>

<wd l="6926" t="12701" r="7757" b="12902">language,</wd>

<space/>

<wd l="7824" t="12701" r="7978" b="12854">if</wd>

<space/>

<wd l="8002" t="12701" r="8381" b="12902">they</wd>

<space/>

<wd l="8434" t="12749" r="8698" b="12859">are</wd>

<space/>

<wd l="8746" t="12720" r="9590" b="12893">recurrent,</wd>

<space/>

<wd l="9643" t="12701" r="10022" b="12902">they</wd>

<space/>

<wd l="10070" t="12701" r="10493" b="12859">need</wd>

<space/>

</ln>

<ln l="6125" t="12955" r="9974" b="13157" baseLine="13104" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="12974" r="6293" b="13114">to</wd>

<space/>

<wd l="6346" t="12955" r="6557" b="13114">be</wd>

<space/>

<wd l="6619" t="12955" r="7483" b="13114">addressed</wd>

<space/>

<wd l="7531" t="12955" r="7757" b="13157">by</wd>

<space/>

<wd l="7810" t="12955" r="9034" b="13114">normalization</wd>

<space/>

<wd l="9082" t="13003" r="9974" b="13157">processes.</wd>

</ln>

</para>

<para l="6125" t="13210" r="10512" b="15182" alignment="justified" spaceBefore="5" spaceAfter="159" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6360" t="13210" r="10483" b="13411" baseLine="13358" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="13210" r="6850" b="13368">Some</wd>

<space/>

<wd l="6974" t="13210" r="8227" b="13368">characteristics</wd>

<space/>

<wd l="8352" t="13210" r="8554" b="13368">of</wd>

<space/>

<wd l="8650" t="13210" r="9110" b="13368">UGC</wd>

<space/>

<wd l="9235" t="13258" r="9499" b="13368">are</wd>

<space/>

<wd l="9624" t="13210" r="10483" b="13411">language-</wd>

</ln>

<ln l="6130" t="13459" r="10483" b="13661" baseLine="13613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="13459" r="7248" b="13661">independent,</wd>

<space/>

<wd l="7334" t="13507" r="7507" b="13618">as</wd>

<space/>

<wd l="7589" t="13459" r="7858" b="13618">the</wd>

<space/>

<wd l="7939" t="13459" r="8328" b="13661">long</wd>

<space/>

<wd l="8400" t="13459" r="9019" b="13618">vowels</wd>

<space/>

<wd l="9101" t="13459" r="9509" b="13618">used</wd>

<space/>

<wd l="9581" t="13478" r="9744" b="13618">to</wd>

<space/>

<wd l="9830" t="13507" r="10483" b="13661">express</wd>

<space/>

</ln>

<ln l="6130" t="13714" r="10488" b="13915" baseLine="13867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="13714" r="6941" b="13915">emphasis</wd>

<space/>

<wd l="7282" t="13714" r="9226" b="13915">(Gooooooooooooood)</wd>

<space/>

<wd l="9571" t="13714" r="9888" b="13872">and</wd>

<space/>

<wd l="10219" t="13714" r="10488" b="13872">the</wd>

<space/>

</ln>

<ln l="6125" t="13968" r="10483" b="14170" baseLine="14117" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13968" r="7478" b="14126">unconventional</wd>

<space/>

<wd l="7608" t="14016" r="7896" b="14126">use</wd>

<space/>

<wd l="8026" t="13968" r="8227" b="14126">of</wd>

<space/>

<wd l="8333" t="13968" r="8832" b="14126">lower</wd>

<space/>

<wd l="8962" t="13968" r="9278" b="14126">and</wd>

<space/>

<wd l="9398" t="14016" r="9902" b="14170">upper</wd>

<space/>

<wd l="10027" t="14016" r="10483" b="14126">cases</wd>

<space/>

</ln>

<ln l="6130" t="14222" r="10488" b="14424" baseLine="14371" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="14222" r="6773" b="14424">(proper</wd>

<space/>

<wd l="6816" t="14270" r="7373" b="14381">names</wd>

<space/>

<wd l="7426" t="14222" r="7594" b="14376">in</wd>

<space/>

<wd l="7642" t="14222" r="8515" b="14381">lowercase</wd>

<space/>

<wd l="8563" t="14222" r="8880" b="14381">and</wd>

<space/>

<wd l="8923" t="14270" r="9686" b="14381">common</wd>

<space/>

<wd l="9730" t="14222" r="10267" b="14381">words</wd>

<space/>

<wd l="10320" t="14222" r="10488" b="14376">in</wd>

<space/>

</ln>

<ln l="6125" t="14472" r="10483" b="14674" baseLine="14626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14472" r="7123" b="14674">uppercase).</wd>

<space/>

<wd l="7253" t="14472" r="7752" b="14630">Other</wd>

<space/>

<wd l="7867" t="14472" r="9120" b="14630">characteristics</wd>

<space/>

<wd l="9245" t="14520" r="9504" b="14630">are</wd>

<space/>

<wd l="9624" t="14472" r="10483" b="14674">language-</wd>

</ln>

<ln l="6130" t="14726" r="10488" b="14928" baseLine="14875" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="14726" r="7075" b="14928">dependent,</wd>

<space/>

<wd l="7157" t="14726" r="7550" b="14885">such</wd>

<space/>

<wd l="7618" t="14774" r="7790" b="14885">as</wd>

<space/>

<wd l="7858" t="14726" r="8126" b="14885">the</wd>

<space/>

<wd l="8194" t="14726" r="9154" b="14928">apostrophe</wd>

<space/>

<wd l="9226" t="14726" r="10258" b="14928">suppression</wd>

<space/>

<wd l="10320" t="14726" r="10488" b="14880">in</wd>

<space/>

</ln>

<ln l="6125" t="14976" r="10512" b="15182" baseLine="15130">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="14981" r="6797" b="15182">English</wd>

<space/>

<wd l="6984" t="14981" r="8194" b="15182">(wont=won’t)</wd>

<space/>

<wd l="8386" t="14981" r="8702" b="15139">and</wd>

<space/>

<wd l="8880" t="14981" r="9149" b="15139">the</wd>

<space/>

</run>

<wd l="9336" t="14981" r="10128" b="15139"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">omis</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">sion</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="10315" t="14981" r="10512" b="15139">of</wd>

</run>

</ln>

</para>

</column>

</section>

<section l="1416" t="15356" r="10517" b="16480">

<column l="1416" t="15356" r="10517" b="16480">

<para l="5804" t="15792" r="6143" b="15946" alignment="centered" spaceBefore="388" lsp="exactly" lspExact="249" language="en">

<ln l="5870" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="39">

<wd l="5870" t="15792" r="6077" b="15946">38</wd>

</ln>

</para>

<para l="2918" t="16133" r="8981" b="16469" alignment="centered" spaceBefore="139" lsp="exactly" lspExact="170" language="en">

<ln l="2918" t="16133" r="8981" b="16301" baseLine="16253" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2918" t="16133" r="3802" b="16296">Proceedings</wd>

<space/>

<wd l="3854" t="16133" r="4018" b="16296">of</wd>

<space/>

<wd l="4037" t="16133" r="4248" b="16262">the</wd>

<space/>

<wd l="4286" t="16138" r="4622" b="16262">ACL</wd>

<space/>

<wd l="4666" t="16133" r="5026" b="16262">2015</wd>

<space/>

<wd l="5078" t="16133" r="5779" b="16296">Workshop</wd>

<space/>

<wd l="5832" t="16176" r="6000" b="16262">on</wd>

<space/>

<wd l="6043" t="16138" r="6456" b="16296">Noisy</wd>

<space/>

<wd l="6518" t="16133" r="7627" b="16296">User-generated</wd>

<space/>

</run>

<wd l="7675" t="16138" r="7992" b="16286"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Text</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8045" t="16171" r="8443" b="16301">pages</wd>

<space/>

<wd l="8501" t="16133" r="8981" b="16286">38–47,
</wd>

</run>

</ln>

<ln l="3029" t="16296" r="8870" b="16469" baseLine="16425" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="16301" r="3595" b="16469">Beijing,</wd>

<space/>

<wd l="3653" t="16301" r="4114" b="16454">China,</wd>

<space/>

<wd l="4166" t="16301" r="4459" b="16469">July</wd>

<space/>

<wd l="4512" t="16301" r="4723" b="16454">31,</wd>

<space/>

<wd l="4781" t="16301" r="5170" b="16430">2015.</wd>

<space/>

</run>

<wd l="5246" t="16296" r="5770" b="16469"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">�</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5822" t="16301" r="6672" b="16430">Association</wd>

<space/>

<wd l="6715" t="16301" r="6926" b="16430">for</wd>

<space/>

<wd l="6974" t="16301" r="8035" b="16469">Computational</wd>

<space/>

<wd l="8078" t="16301" r="8870" b="16469">Linguistics</wd>

</run>

</ln>

</para>

</column>

</section>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1416" marginTop="1417" marginRight="1392" marginBottom="1302" offsetX="-8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1416" t="1417" r="10517" b="15430">

<column l="1416" t="1417" r="5813" b="15430">

<para l="1426" t="1464" r="5779" b="1920" alignment="justified" spaceBefore="2" lsp="exactly" lspExact="253" language="en">

<ln l="1426" t="1464" r="5779" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="1464" r="2222" b="1622">diacritics</wd>

<space/>

<wd l="2347" t="1464" r="2669" b="1622">and</wd>

<space/>

<wd l="2784" t="1464" r="3370" b="1622">cedilla</wd>

<space/>

<wd l="3485" t="1464" r="3989" b="1622">under</wd>

<space/>

<wd l="4109" t="1464" r="4397" b="1622">“c”</wd>

<space/>

<wd l="4522" t="1464" r="4690" b="1618">in</wd>

<space/>

<wd l="4805" t="1469" r="5779" b="1666">Portuguese</wd>

<space/>

</ln>

<ln l="1426" t="1718" r="2981" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="1718" r="2981" b="1920">(eleicao=eleição).</wd>

</ln>

</para>

<para l="1421" t="1968" r="5803" b="3941" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="1968" r="5789" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1646" t="1968" r="2107" b="2126">UGC</wd>

<space/>

<wd l="2155" t="1968" r="2722" b="2126">differs</wd>

<space/>

<wd l="2774" t="1968" r="3202" b="2126">from</wd>

<space/>

<wd l="3240" t="1968" r="3509" b="2126">the</wd>

<space/>

<wd l="3562" t="1968" r="4301" b="2126">standard</wd>

<space/>

<wd l="4344" t="1968" r="5131" b="2170">language</wd>

<space/>

<wd l="5174" t="1968" r="5789" b="2170">mainly</wd>

<space/>

</ln>

<ln l="1426" t="2222" r="5779" b="2414" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1426" t="2222" r="1589" b="2376">in</wd>

<space/>

<wd l="1781" t="2222" r="2050" b="2381">the</wd>

<space/>

<wd l="2246" t="2222" r="2822" b="2381">lexical</wd>

<space/>

<wd l="3019" t="2222" r="3490" b="2381">level.</wd>

<space/>

<wd l="3691" t="2227" r="3998" b="2381">For</wd>

<space/>

<wd l="4186" t="2222" r="4502" b="2381">this</wd>

<space/>

<wd l="4694" t="2270" r="5318" b="2414">reason,</wd>

<space/>

<wd l="5515" t="2222" r="5779" b="2381">the</wd>

<space/>

</ln>

<ln l="1421" t="2477" r="5784" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="2477" r="2640" b="2635">normalization</wd>

<space/>

<wd l="2894" t="2477" r="3634" b="2678">problem</wd>

<space/>

<wd l="3893" t="2477" r="4032" b="2635">is</wd>

<space/>

<wd l="4296" t="2477" r="5309" b="2678">approached</wd>

<space/>

<wd l="5558" t="2477" r="5784" b="2678">by</wd>

<space/>

</ln>

<ln l="1430" t="2726" r="5803" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1430" t="2726" r="2246" b="2928">strategies</wd>

<space/>

<wd l="2314" t="2726" r="2515" b="2885">of</wd>

<space/>

<wd l="2554" t="2726" r="3010" b="2885">word</wd>

<space/>

<wd l="3067" t="2726" r="3955" b="2885">correction</wd>

<space/>

<wd l="4022" t="2726" r="4354" b="2928">(the</wd>

<space/>

<wd l="4421" t="2726" r="5002" b="2885">lexical</wd>

<space/>

<wd l="5069" t="2726" r="5534" b="2885">items</wd>

<space/>

<wd l="5602" t="2726" r="5803" b="2885">of</wd>

<space/>

</ln>

<ln l="1421" t="2981" r="5789" b="3182" baseLine="3134">

<wd l="1421" t="2981" r="1690" b="3139"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">th</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">e</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

<wd l="1742" t="2981" r="2198" b="3139">UGC</wd>

<space/>

<wd l="2256" t="3029" r="2520" b="3139">are</wd>

<space/>

<wd l="2568" t="2981" r="3173" b="3139">treated</wd>

<space/>

<wd l="3221" t="3029" r="3394" b="3139">as</wd>

<space/>

<wd l="3456" t="2981" r="4224" b="3182">“errors”)</wd>

<space/>

<wd l="4286" t="2981" r="4603" b="3139">and</wd>

<space/>

<wd l="4656" t="2981" r="5472" b="3182">strategies</wd>

<space/>

</run>

<wd l="5530" t="2981" r="5789" b="3139"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">f</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">or</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

</run>

</ln>

<ln l="1421" t="3235" r="5779" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1421" t="3235" r="2160" b="3394">machine</wd>

<space/>

<wd l="2218" t="3235" r="3149" b="3394">translation</wd>

<space/>

<wd l="3206" t="3235" r="3542" b="3437">(the</wd>

<space/>

<wd l="3600" t="3235" r="4056" b="3394">UGC</wd>

<space/>

<wd l="4118" t="3235" r="4258" b="3394">is</wd>

<space/>

<wd l="4315" t="3235" r="4920" b="3394">treated</wd>

<space/>

<wd l="4978" t="3283" r="5150" b="3394">as</wd>

<space/>

<wd l="5218" t="3283" r="5779" b="3394">source</wd>

<space/>

</ln>

<ln l="1426" t="3490" r="5784" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1426" t="3490" r="2213" b="3691">language</wd>

<space/>

<wd l="2342" t="3490" r="2664" b="3648">and</wd>

<space/>

<wd l="2784" t="3490" r="3053" b="3648">the</wd>

<space/>

<wd l="3187" t="3490" r="3931" b="3648">standard</wd>

<space/>

<wd l="4056" t="3490" r="4843" b="3691">language</wd>

<space/>

<wd l="4978" t="3538" r="5155" b="3648">as</wd>

<space/>

<wd l="5285" t="3509" r="5784" b="3691">target</wd>

<space/>

</ln>

<ln l="1426" t="3739" r="2328" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1426" t="3739" r="2328" b="3941">language).</wd>

</ln>

</para>

<para l="1421" t="3994" r="5784" b="5712" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1646" t="3994" r="5779" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="3998" r="1949" b="4152">We</wd>

<space/>

<wd l="2006" t="3994" r="2654" b="4152">address</wd>

<space/>

<wd l="2707" t="3994" r="3259" b="4152">herein</wd>

<space/>

<wd l="3307" t="3994" r="3576" b="4152">the</wd>

<space/>

<wd l="3624" t="3994" r="4848" b="4152">normalization</wd>

<space/>

<wd l="4891" t="4042" r="5549" b="4195">process</wd>

<space/>

<wd l="5606" t="4042" r="5779" b="4152">as</wd>

<space/>

</ln>

<ln l="1426" t="4248" r="5784" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="4296" r="1522" b="4406">a</wd>

<space/>

<wd l="1598" t="4267" r="1834" b="4406">set</wd>

<space/>

<wd l="1906" t="4248" r="2107" b="4406">of</wd>

<space/>

<wd l="2150" t="4248" r="3115" b="4450">procedures</wd>

<space/>

<wd l="3192" t="4248" r="3523" b="4406">that</wd>

<space/>

<wd l="3600" t="4248" r="3955" b="4406">deal</wd>

<space/>

<wd l="4032" t="4248" r="4421" b="4406">with</wd>

<space/>

<wd l="4493" t="4248" r="5246" b="4406">different</wd>

<space/>

<wd l="5318" t="4267" r="5784" b="4450">types</wd>

<space/>

</ln>

<ln l="1426" t="4498" r="5784" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="4498" r="1627" b="4656">of</wd>

<space/>

<wd l="1738" t="4498" r="2592" b="4656">deviation.</wd>

<space/>

<wd l="2741" t="4498" r="3077" b="4656">The</wd>

<space/>

<wd l="3211" t="4498" r="3662" b="4699">input</wd>

<space/>

<wd l="3792" t="4498" r="4483" b="4656">consists</wd>

<space/>

<wd l="4622" t="4498" r="4824" b="4656">of</wd>

<space/>

<wd l="4934" t="4546" r="5784" b="4656">consumer</wd>

<space/>

</ln>

<ln l="1421" t="4752" r="5784" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="4752" r="2102" b="4910">reviews</wd>

<space/>

<wd l="2160" t="4800" r="2376" b="4910">on</wd>

<space/>

<wd l="2429" t="4752" r="3283" b="4910">electronic</wd>

<space/>

<wd l="3336" t="4752" r="4142" b="4954">products.</wd>

<space/>

<wd l="4210" t="4752" r="4546" b="4910">The</wd>

<space/>

<wd l="4598" t="4752" r="5035" b="4910">main</wd>

<space/>

<wd l="5083" t="4800" r="5784" b="4954">purpose</wd>

<space/>

</ln>

<ln l="1426" t="5006" r="5784" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5006" r="1560" b="5165">is</wd>

<space/>

<wd l="1608" t="5026" r="1776" b="5165">to</wd>

<space/>

<wd l="1834" t="5026" r="2486" b="5165">convert</wd>

<space/>

<wd l="2539" t="5006" r="2938" b="5165">such</wd>

<space/>

<wd l="2981" t="5026" r="3446" b="5198">texts,</wd>

<space/>

<wd l="3499" t="5054" r="3677" b="5165">as</wd>

<space/>

<wd l="3730" t="5006" r="4349" b="5208">closely</wd>

<space/>

<wd l="4397" t="5054" r="4574" b="5165">as</wd>

<space/>

<wd l="4618" t="5006" r="5390" b="5208">possible,</wd>

<space/>

<wd l="5448" t="5006" r="5784" b="5165">into</wd>

<space/>

</ln>

<ln l="1421" t="5261" r="5784" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5261" r="1685" b="5419">the</wd>

<space/>

<wd l="1838" t="5261" r="2261" b="5419">form</wd>

<space/>

<wd l="2400" t="5261" r="3187" b="5462">expected</wd>

<space/>

<wd l="3317" t="5261" r="3542" b="5462">by</wd>

<space/>

<wd l="3677" t="5266" r="4090" b="5419">NLP</wd>

<space/>

<wd l="4238" t="5261" r="4666" b="5419">tools</wd>

<space/>

<wd l="4810" t="5261" r="5429" b="5419">trained</wd>

<space/>

<wd l="5568" t="5309" r="5784" b="5419">on</wd>

<space/>

</ln>

<ln l="1426" t="5510" r="4018" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5558" r="2093" b="5712">corpora</wd>

<space/>

<wd l="2150" t="5510" r="2352" b="5669">of</wd>

<space/>

<wd l="2395" t="5510" r="3134" b="5669">standard</wd>

<space/>

<wd l="3187" t="5510" r="4018" b="5712">language.</wd>

</ln>

</para>

<para l="1426" t="5765" r="5789" b="7738" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="5765" r="5789" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1651" t="5765" r="2035" b="5923">This</wd>

<space/>

<wd l="2117" t="5765" r="2573" b="5923">work</wd>

<space/>

<wd l="2645" t="5813" r="2986" b="5923">was</wd>

<space/>

<wd l="3062" t="5765" r="3864" b="5966">preceded</wd>

<space/>

<wd l="3936" t="5765" r="4157" b="5966">by</wd>

<space/>

<wd l="4234" t="5765" r="4502" b="5923">the</wd>

<space/>

<wd l="4589" t="5765" r="5390" b="5923">detection</wd>

<space/>

<wd l="5472" t="5765" r="5789" b="5923">and</wd>

<space/>

</ln>

<ln l="1426" t="5995" r="5789" b="6221" baseLine="6164">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1426" t="6019" r="2122" b="6221">analysis</wd>

<space/>

<wd l="2198" t="6019" r="2395" b="6178">of</wd>

<space/>

</run>

<wd l="2443" t="5995" r="4080" b="6221"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">out-of-vocabulary</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4166" t="6019" r="4781" b="6221">(OOV)</wd>

<space/>

<wd l="4853" t="6019" r="5386" b="6178">words</wd>

<space/>

<wd l="5458" t="6019" r="5626" b="6173">in</wd>

<space/>

<wd l="5693" t="6067" r="5789" b="6178">a</wd>

<space/>

</run>

</ln>

<ln l="1426" t="6269" r="5774" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="6317" r="2002" b="6470">corpus</wd>

<space/>

<wd l="2059" t="6269" r="2261" b="6427">of</wd>

<space/>

<wd l="2285" t="6269" r="2962" b="6470">product</wd>

<space/>

<wd l="3010" t="6269" r="3691" b="6427">reviews</wd>

<space/>

<wd l="3749" t="6269" r="4694" b="6470">(Hartmann</wd>

<space/>

<wd l="4752" t="6288" r="4906" b="6427">et</wd>

<space/>

<wd l="4958" t="6269" r="5155" b="6427">al.</wd>

<space/>

<wd l="5222" t="6269" r="5774" b="6470">2014).</wd>

<space/>

</ln>

<ln l="1426" t="6523" r="5779" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="6528" r="1598" b="6677">In</wd>

<space/>

<wd l="1728" t="6523" r="2390" b="6682">another</wd>

<space/>

<wd l="2510" t="6523" r="3542" b="6725">preliminary</wd>

<space/>

<wd l="3672" t="6523" r="4848" b="6725">investigation,</wd>

<space/>

<wd l="4982" t="6571" r="5237" b="6682">we</wd>

<space/>

<wd l="5366" t="6523" r="5779" b="6682">have</wd>

<space/>

</ln>

<ln l="1426" t="6778" r="5789" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="6778" r="1939" b="6936">found</wd>

<space/>

<wd l="1992" t="6778" r="2438" b="6936">other</wd>

<space/>

<wd l="2496" t="6778" r="3250" b="6936">different</wd>

<space/>

<wd l="3298" t="6797" r="3758" b="6979">types</wd>

<space/>

<wd l="3821" t="6778" r="4022" b="6936">of</wd>

<space/>

<wd l="4056" t="6778" r="4954" b="6936">deviations</wd>

<space/>

<wd l="5016" t="6778" r="5333" b="6936">and</wd>

<space/>

<wd l="5381" t="6778" r="5789" b="6936">their</wd>

<space/>

</ln>

<ln l="1426" t="7027" r="5789" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="7027" r="2016" b="7229">impact</wd>

<space/>

<wd l="2064" t="7075" r="2280" b="7186">on</wd>

<space/>

<wd l="2323" t="7075" r="2419" b="7186">a</wd>

<space/>

<wd l="2458" t="7027" r="3120" b="7229">tagging</wd>

<space/>

<wd l="3158" t="7027" r="3518" b="7186">task</wd>

<space/>

<wd l="3557" t="7027" r="4176" b="7229">(Duran</wd>

<space/>

<wd l="4229" t="7046" r="4382" b="7186">et</wd>

<space/>

<wd l="4426" t="7027" r="4690" b="7219">al.,</wd>

<space/>

<wd l="4742" t="7027" r="5294" b="7229">2014).</wd>

<space/>

<wd l="5357" t="7027" r="5789" b="7186">Such</wd>

<space/>

</ln>

<ln l="1426" t="7282" r="5784" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="7282" r="2246" b="7483">diagnosis</wd>

<space/>

<wd l="2386" t="7282" r="2674" b="7440">has</wd>

<space/>

<wd l="2813" t="7282" r="3514" b="7440">resulted</wd>

<space/>

<wd l="3648" t="7282" r="3816" b="7435">in</wd>

<space/>

<wd l="3950" t="7282" r="4219" b="7440">the</wd>

<space/>

<wd l="4349" t="7282" r="5318" b="7483">procedures</wd>

<space/>

<wd l="5453" t="7282" r="5784" b="7440">that</wd>

<space/>

</ln>

<ln l="1426" t="7536" r="5770" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="7536" r="2189" b="7738">integrate</wd>

<space/>

<wd l="2237" t="7536" r="2506" b="7694">the</wd>

<space/>

<wd l="2558" t="7536" r="3782" b="7694">normalization</wd>

<space/>

<wd l="3840" t="7555" r="4445" b="7738">system</wd>

<space/>

<wd l="4483" t="7536" r="5304" b="7738">proposed</wd>

<space/>

<wd l="5352" t="7536" r="5770" b="7694">here.</wd>

</ln>

</para>

<para l="1421" t="7790" r="5789" b="9970" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="7790" r="5779" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="7790" r="1987" b="7949">The</wd>

<space/>

<wd l="2093" t="7790" r="2986" b="7949">remainder</wd>

<space/>

<wd l="3091" t="7790" r="3293" b="7949">of</wd>

<space/>

<wd l="3370" t="7790" r="3686" b="7949">this</wd>

<space/>

<wd l="3787" t="7838" r="4282" b="7992">paper</wd>

<space/>

<wd l="4387" t="7790" r="4526" b="7949">is</wd>

<space/>

<wd l="4637" t="7790" r="5501" b="7992">organized</wd>

<space/>

<wd l="5602" t="7838" r="5779" b="7949">as</wd>

<space/>

</ln>

<ln l="1426" t="8040" r="5784" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8040" r="2122" b="8198">follows.</wd>

<space/>

<wd l="2194" t="8040" r="2846" b="8198">Section</wd>

<space/>

<wd l="2899" t="8040" r="3000" b="8194">2</wd>

<space/>

<wd l="3053" t="8059" r="3773" b="8242">presents</wd>

<space/>

<wd l="3830" t="8040" r="4435" b="8198">related</wd>

<space/>

<wd l="4478" t="8040" r="5059" b="8198">works.</wd>

<space/>

<wd l="5131" t="8040" r="5784" b="8198">Section</wd>

<space/>

</ln>

<ln l="1426" t="8294" r="5784" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8294" r="1517" b="8453">3</wd>

<space/>

<wd l="1666" t="8294" r="2472" b="8453">describes</wd>

<space/>

<wd l="2611" t="8294" r="2875" b="8453">the</wd>

<space/>

<wd l="3014" t="8294" r="4262" b="8453">characteristics</wd>

<space/>

<wd l="4402" t="8294" r="4603" b="8453">of</wd>

<space/>

<wd l="4709" t="8294" r="4978" b="8453">the</wd>

<space/>

<wd l="5107" t="8294" r="5784" b="8496">product</wd>

<space/>

</ln>

<ln l="1421" t="8549" r="5779" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8549" r="2016" b="8707">review</wd>

<space/>

<wd l="2141" t="8597" r="2717" b="8750">corpus</wd>

<space/>

<wd l="2842" t="8549" r="3163" b="8707">and</wd>

<space/>

<wd l="3274" t="8549" r="3542" b="8707">the</wd>

<space/>

<wd l="3658" t="8549" r="4474" b="8750">problems</wd>

<space/>

<wd l="4594" t="8549" r="4978" b="8750">they</wd>

<space/>

<wd l="5088" t="8597" r="5496" b="8750">pose</wd>

<space/>

<wd l="5611" t="8568" r="5779" b="8707">to</wd>

<space/>

</ln>

<ln l="1421" t="8798" r="5784" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8798" r="2683" b="8957">normalization.</wd>

<space/>

<wd l="2760" t="8798" r="3413" b="8957">Section</wd>

<space/>

<wd l="3466" t="8798" r="3571" b="8952">4</wd>

<space/>

<wd l="3634" t="8818" r="4238" b="9000">reports</wd>

<space/>

<wd l="4296" t="8798" r="4565" b="8957">the</wd>

<space/>

<wd l="4622" t="8798" r="5784" b="9000">methodology</wd>

<space/>

</ln>

<ln l="1421" t="9053" r="5770" b="9211" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="9053" r="1829" b="9211">used</wd>

<space/>

<wd l="1867" t="9072" r="2035" b="9211">to</wd>

<space/>

<wd l="2093" t="9072" r="2894" b="9211">construct</wd>

<space/>

<wd l="2938" t="9053" r="3206" b="9211">the</wd>

<space/>

<wd l="3254" t="9053" r="4478" b="9211">normalization</wd>

<space/>

<wd l="4522" t="9053" r="4910" b="9211">tool.</wd>

<space/>

<wd l="4978" t="9053" r="5626" b="9211">Section</wd>

<space/>

<wd l="5683" t="9058" r="5770" b="9211">5</wd>

<space/>

</ln>

<ln l="1426" t="9307" r="5789" b="9466" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9307" r="2232" b="9466">describes</wd>

<space/>

<wd l="2424" t="9307" r="2741" b="9466">and</wd>

<space/>

<wd l="2923" t="9307" r="3734" b="9466">discusses</wd>

<space/>

<wd l="3917" t="9307" r="4186" b="9466">the</wd>

<space/>

<wd l="4373" t="9307" r="5285" b="9466">evaluation</wd>

<space/>

<wd l="5472" t="9307" r="5789" b="9466">and</wd>

<space/>

</ln>

<ln l="1421" t="9557" r="5779" b="9758" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="9557" r="2299" b="9715">validation</wd>

<space/>

<wd l="2362" t="9557" r="2981" b="9715">results.</wd>

<space/>

<wd l="3058" t="9557" r="3725" b="9758">Finally,</wd>

<space/>

<wd l="3802" t="9557" r="3970" b="9710">in</wd>

<space/>

<wd l="4042" t="9557" r="4694" b="9715">Section</wd>

<space/>

<wd l="4771" t="9557" r="4920" b="9749">6,</wd>

<space/>

<wd l="4992" t="9605" r="5246" b="9715">we</wd>

<space/>

<wd l="5309" t="9557" r="5779" b="9715">make</wd>

<space/>

</ln>

<ln l="1430" t="9811" r="5270" b="9970" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="9859" r="1882" b="9970">some</wd>

<space/>

<wd l="1939" t="9811" r="2338" b="9970">final</wd>

<space/>

<wd l="2395" t="9811" r="3101" b="9970">remarks</wd>

<space/>

<wd l="3163" t="9811" r="3485" b="9970">and</wd>

<space/>

<wd l="3538" t="9811" r="4142" b="9970">outline</wd>

<space/>

<wd l="4200" t="9811" r="4723" b="9970">future</wd>

<space/>

<wd l="4781" t="9811" r="5270" b="9970">work.</wd>

</ln>

</para>

<para l="1426" t="10277" r="3245" b="10445" alignment="left" spaceBefore="201" lsp="exactly" lspExact="273" language="en">

<ln l="1426" t="10277" r="3245" b="10445" baseLine="10435" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="6">

<wd l="1426" t="10277" r="1594" b="10445">2.</wd>

<space/>

<wd l="1781" t="10277" r="2558" b="10445">Related</wd>

<space/>

<wd l="2621" t="10277" r="3245" b="10445">works</wd>

</ln>

</para>

<para l="1416" t="10699" r="5784" b="13138" alignment="justified" spaceBefore="159" lsp="exactly" lspExact="253" language="en">

<ln l="1426" t="10699" r="5779" b="10901" baseLine="10853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="10704" r="1824" b="10858">Text</wd>

<space/>

<wd l="1901" t="10699" r="3120" b="10858">normalization</wd>

<space/>

<wd l="3202" t="10699" r="3341" b="10858">is</wd>

<space/>

<wd l="3427" t="10747" r="3523" b="10858">a</wd>

<space/>

<wd l="3595" t="10718" r="3998" b="10858">term</wd>

<space/>

<wd l="4070" t="10699" r="4483" b="10858">used</wd>

<space/>

<wd l="4555" t="10718" r="4723" b="10858">to</wd>

<space/>

<wd l="4805" t="10747" r="5438" b="10901">convey</wd>

<space/>

<wd l="5510" t="10699" r="5779" b="10858">the</wd>

<space/>

</ln>

<ln l="1426" t="10954" r="5779" b="11155" baseLine="11107" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="10954" r="1790" b="11112">idea</wd>

<space/>

<wd l="1843" t="10954" r="2045" b="11112">of</wd>

<space/>

<wd l="2083" t="10954" r="3019" b="11155">converting</wd>

<space/>

<wd l="3072" t="10954" r="3341" b="11112">the</wd>

<space/>

<wd l="3398" t="10954" r="3979" b="11112">format</wd>

<space/>

<wd l="4042" t="10954" r="4238" b="11112">of</wd>

<space/>

<wd l="4277" t="11002" r="4373" b="11112">a</wd>

<space/>

<wd l="4421" t="10973" r="4752" b="11112">text</wd>

<space/>

<wd l="4805" t="10973" r="4973" b="11112">to</wd>

<space/>

<wd l="5035" t="10973" r="5462" b="11112">meet</wd>

<space/>

<wd l="5515" t="10954" r="5779" b="11112">the</wd>

<space/>

</ln>

<ln l="1421" t="11208" r="5779" b="11410" baseLine="11357" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="11208" r="2563" b="11410">requirements</wd>

<space/>

<wd l="2635" t="11208" r="2832" b="11366">of</wd>

<space/>

<wd l="2875" t="11256" r="2971" b="11366">a</wd>

<space/>

<wd l="3034" t="11208" r="3518" b="11410">given</wd>

<space/>

<wd l="3571" t="11256" r="4320" b="11410">purpose.</wd>

<space/>

<wd l="4392" t="11208" r="4901" b="11366">There</wd>

<space/>

<wd l="4968" t="11256" r="5232" b="11366">are</wd>

<space/>

<wd l="5294" t="11256" r="5779" b="11410">many</wd>

<space/>

</ln>

<ln l="1421" t="11458" r="5774" b="11659" baseLine="11611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="11477" r="1747" b="11616">text</wd>

<space/>

<wd l="1814" t="11458" r="3034" b="11616">normalization</wd>

<space/>

<wd l="3091" t="11506" r="3936" b="11659">processes</wd>

<space/>

<wd l="4003" t="11458" r="4742" b="11659">reported</wd>

<space/>

<wd l="4805" t="11458" r="4973" b="11611">in</wd>

<space/>

<wd l="5030" t="11458" r="5299" b="11616">the</wd>

<space/>

<wd l="5362" t="11462" r="5774" b="11616">NLP</wd>

<space/>

</ln>

<ln l="1426" t="11712" r="5784" b="11914" baseLine="11866" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="11712" r="2213" b="11870">literature</wd>

<space/>

<wd l="2266" t="11712" r="2582" b="11870">and</wd>

<space/>

<wd l="2621" t="11712" r="3000" b="11914">they</wd>

<space/>

<wd l="3043" t="11760" r="3437" b="11914">vary</wd>

<space/>

<wd l="3485" t="11712" r="3696" b="11870">in:</wd>

<space/>

<wd l="3763" t="11712" r="3888" b="11914">i)</wd>

<space/>

<wd l="3941" t="11712" r="4210" b="11870">the</wd>

<space/>

<wd l="4258" t="11760" r="4742" b="11914">genre</wd>

<space/>

<wd l="4795" t="11712" r="4992" b="11870">of</wd>

<space/>

<wd l="5016" t="11712" r="5285" b="11870">the</wd>

<space/>

<wd l="5338" t="11712" r="5784" b="11914">input</wd>

<space/>

</ln>

<ln l="1421" t="11966" r="5784" b="12168" baseLine="12115" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="11986" r="1795" b="12158">text;</wd>

<space/>

<wd l="1867" t="11966" r="2054" b="12168">ii)</wd>

<space/>

<wd l="2117" t="11966" r="2386" b="12125">the</wd>

<space/>

<wd l="2443" t="11966" r="3082" b="12125">desired</wd>

<space/>

<wd l="3134" t="11986" r="3696" b="12168">output</wd>

<space/>

<wd l="3754" t="11966" r="4382" b="12158">format;</wd>

<space/>

<wd l="4454" t="11966" r="4699" b="12168">iii)</wd>

<space/>

<wd l="4762" t="11966" r="5030" b="12125">the</wd>

<space/>

<wd l="5083" t="12014" r="5784" b="12168">purpose</wd>

<space/>

</ln>

<ln l="1426" t="12221" r="5779" b="12422" baseLine="12370" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="12221" r="1627" b="12379">of</wd>

<space/>

<wd l="1675" t="12221" r="1939" b="12379">the</wd>

<space/>

<wd l="2016" t="12221" r="3283" b="12413">normalization,</wd>

<space/>

<wd l="3365" t="12221" r="3686" b="12379">and</wd>

<space/>

<wd l="3754" t="12221" r="3984" b="12422">iv)</wd>

<space/>

<wd l="4066" t="12221" r="4334" b="12379">the</wd>

<space/>

<wd l="4406" t="12221" r="5069" b="12379">method</wd>

<space/>

<wd l="5136" t="12221" r="5544" b="12379">used</wd>

<space/>

<wd l="5611" t="12240" r="5779" b="12379">to</wd>

<space/>

</ln>

<ln l="1416" t="12470" r="5779" b="12672" baseLine="12624" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="12470" r="2131" b="12672">perform</wd>

<space/>

<wd l="2246" t="12470" r="2515" b="12629">the</wd>

<space/>

<wd l="2635" t="12470" r="3034" b="12629">task.</wd>

<space/>

<wd l="3173" t="12475" r="3298" b="12629">It</wd>

<space/>

<wd l="3422" t="12470" r="3562" b="12629">is</wd>

<space/>

<wd l="3691" t="12470" r="4541" b="12672">important</wd>

<space/>

<wd l="4661" t="12490" r="4829" b="12629">to</wd>

<space/>

<wd l="4954" t="12470" r="5318" b="12629">take</wd>

<space/>

<wd l="5443" t="12470" r="5779" b="12629">into</wd>

<space/>

</ln>

<ln l="1426" t="12725" r="5784" b="12926" baseLine="12878" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="12744" r="2102" b="12883">account</wd>

<space/>

<wd l="2160" t="12725" r="2554" b="12883">such</wd>

<space/>

<wd l="2602" t="12725" r="3850" b="12883">characteristics</wd>

<space/>

<wd l="3898" t="12744" r="4066" b="12883">to</wd>

<space/>

<wd l="4123" t="12725" r="4718" b="12926">clearly</wd>

<space/>

<wd l="4766" t="12725" r="5309" b="12883">define</wd>

<space/>

<wd l="5357" t="12725" r="5784" b="12883">what</wd>

<space/>

</ln>

<ln l="1426" t="12979" r="5270" b="13138" baseLine="13128" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="12979" r="1848" b="13138">“text</wd>

<space/>

<wd l="1906" t="12979" r="3221" b="13138">normalization”</wd>

<space/>

<wd l="3278" t="13027" r="3835" b="13138">means</wd>

<space/>

<wd l="3898" t="12979" r="4066" b="13133">in</wd>

<space/>

<wd l="4123" t="12979" r="4522" b="13138">each</wd>

<space/>

<wd l="4584" t="12998" r="5270" b="13138">context.</wd>

</ln>

</para>

<para l="1426" t="13229" r="5784" b="13685" alignment="justified" spaceBefore="3" spaceAfter="287" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="13229" r="5774" b="13430" baseLine="13382" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1651" t="13229" r="1987" b="13387">The</wd>

<space/>

<wd l="2050" t="13229" r="2501" b="13430">input</wd>

<space/>

<wd l="2554" t="13248" r="2885" b="13387">text</wd>

<space/>

<wd l="2942" t="13277" r="3317" b="13430">may</wd>

<space/>

<wd l="3379" t="13277" r="3562" b="13387">or</wd>

<space/>

<wd l="3614" t="13277" r="3994" b="13430">may</wd>

<space/>

<wd l="4051" t="13248" r="4334" b="13387">not</wd>

<space/>

<wd l="4387" t="13229" r="4594" b="13387">be</wd>

<space/>

<wd l="4656" t="13229" r="5774" b="13387">well-written.</wd>

<space/>

</ln>

<ln l="1426" t="13483" r="5784" b="13685" baseLine="13637" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1426" t="13483" r="1762" b="13642">The</wd>

<space/>

<wd l="1805" t="13483" r="2170" b="13642">task</wd>

<space/>

<wd l="2213" t="13483" r="2414" b="13642">of</wd>

<space/>

<wd l="2438" t="13483" r="3499" b="13685">normalizing</wd>

<space/>

<wd l="3542" t="13502" r="3878" b="13642">text</wd>

<space/>

<wd l="3926" t="13483" r="4354" b="13642">from</wd>

<space/>

<wd l="4402" t="13531" r="4498" b="13642">a</wd>

<space/>

<wd l="4541" t="13531" r="5486" b="13685">newspaper</wd>

<space/>

<wd l="5534" t="13483" r="5784" b="13685">(as</wd>

</ln>

</para>

<rulerline l="1416" t="14002" r="4301" b="14002" type="single" width="19" color="000000"/>

<para l="1421" t="14126" r="5789" b="15422" alignment="justified" spaceBefore="191" lsp="exactly" lspExact="206" language="en">

<ln l="1435" t="14126" r="5789" b="14371" baseLine="14319">

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1435" t="14126" r="1483" b="14237">1</wd>

<space/>

</run>

<wd l="1565" t="14198" r="2976" b="14371"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">“Out</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-of-vocabulary</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3019" t="14198" r="3523" b="14366">(OOV)</wd>

<space/>

<wd l="3571" t="14198" r="4008" b="14333">words</wd>

<space/>

<wd l="4061" t="14242" r="4272" b="14333">are</wd>

<space/>

<wd l="4320" t="14198" r="4992" b="14333">unknown</wd>

<space/>

<wd l="5030" t="14198" r="5467" b="14333">words</wd>

<space/>

<wd l="5515" t="14198" r="5789" b="14333">that</wd>

<space/>

</run>

</ln>

<ln l="1426" t="14419" r="5779" b="14592" baseLine="14549" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="14462" r="1901" b="14587">appear</wd>

<space/>

<wd l="2006" t="14419" r="2141" b="14549">in</wd>

<space/>

<wd l="2242" t="14419" r="2458" b="14554">the</wd>

<space/>

<wd l="2563" t="14419" r="3048" b="14592">testing</wd>

<space/>

<wd l="3149" t="14419" r="3634" b="14587">speech</wd>

<space/>

<wd l="3730" t="14419" r="3970" b="14554">but</wd>

<space/>

<wd l="4066" t="14438" r="4301" b="14554">not</wd>

<space/>

<wd l="4406" t="14419" r="4541" b="14549">in</wd>

<space/>

<wd l="4642" t="14419" r="4858" b="14554">the</wd>

<space/>

<wd l="4963" t="14419" r="5779" b="14592">recognition</wd>

<space/>

</ln>

<ln l="1421" t="14626" r="5779" b="14798" baseLine="14755" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="14626" r="2246" b="14794">vocabulary.</wd>

<space/>

<wd l="2323" t="14626" r="2693" b="14798">They</wd>

<space/>

<wd l="2750" t="14669" r="2966" b="14760">are</wd>

<space/>

<wd l="3029" t="14626" r="3552" b="14798">usually</wd>

<space/>

<wd l="3610" t="14626" r="4306" b="14794">important</wd>

<space/>

<wd l="4368" t="14645" r="4896" b="14760">content</wd>

<space/>

<wd l="4954" t="14626" r="5390" b="14760">words</wd>

<space/>

<wd l="5458" t="14626" r="5779" b="14760">such</wd>

<space/>

</ln>

<ln l="1426" t="14837" r="5779" b="15000" baseLine="14962" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="14880" r="1565" b="14971">as</wd>

<space/>

<wd l="1613" t="14880" r="2064" b="14971">names</wd>

<space/>

<wd l="2117" t="14837" r="2376" b="14971">and</wd>

<space/>

<wd l="2424" t="14837" r="3110" b="15000">locations,</wd>

<space/>

<wd l="3158" t="14837" r="3600" b="14971">which</wd>

<space/>

<wd l="3648" t="14837" r="4176" b="14971">contain</wd>

<space/>

<wd l="4224" t="14837" r="5069" b="14971">information</wd>

<space/>

<wd l="5117" t="14837" r="5597" b="14971">crucial</wd>

<space/>

<wd l="5645" t="14856" r="5779" b="14971">to</wd>

<space/>

</ln>

<ln l="1421" t="15043" r="5784" b="15216" baseLine="15168" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="15043" r="1637" b="15178">the</wd>

<space/>

<wd l="1680" t="15086" r="2213" b="15178">success</wd>

<space/>

<wd l="2251" t="15048" r="2419" b="15178">of</wd>

<space/>

<wd l="2434" t="15086" r="2837" b="15216">many</wd>

<space/>

<wd l="2875" t="15043" r="3360" b="15211">speech</wd>

<space/>

<wd l="3394" t="15043" r="4219" b="15216">recognition</wd>

<space/>

<wd l="4253" t="15043" r="4646" b="15178">tasks.</wd>

<space/>

<wd l="4694" t="15053" r="5390" b="15206">However,</wd>

<space/>

<wd l="5434" t="15062" r="5784" b="15178">most</wd>

<space/>

</ln>

<ln l="1426" t="15250" r="5789" b="15422" baseLine="15374" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="15250" r="1906" b="15418">speech</wd>

<space/>

<wd l="2146" t="15250" r="2966" b="15422">recognition</wd>

<space/>

<wd l="3206" t="15269" r="3763" b="15422">systems</wd>

<space/>

<wd l="4013" t="15293" r="4224" b="15384">are</wd>

<space/>

<wd l="4469" t="15250" r="5789" b="15418">closed-vocabulary</wd>

</ln>

</para>

</column>

<column l="6120" t="1417" r="10517" b="14946">

<para l="6125" t="1464" r="10493" b="5712" alignment="justified" spaceBefore="4" lsp="exactly" lspExact="252" language="en">

<ln l="6130" t="1464" r="10493" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="1464" r="6298" b="1618">in</wd>

<space/>

<wd l="6394" t="1464" r="7152" b="1666">Schlippe</wd>

<space/>

<wd l="7248" t="1483" r="7402" b="1622">et</wd>

<space/>

<wd l="7493" t="1464" r="7752" b="1656">al.,</wd>

<space/>

<wd l="7853" t="1464" r="8357" b="1666">2012)</wd>

<space/>

<wd l="8458" t="1464" r="8592" b="1622">is</wd>

<space/>

<wd l="8693" t="1464" r="9125" b="1666">quite</wd>

<space/>

<wd l="9221" t="1464" r="9974" b="1622">different</wd>

<space/>

<wd l="10066" t="1464" r="10493" b="1622">from</wd>

<space/>

</ln>

<ln l="6125" t="1718" r="10488" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="1718" r="7186" b="1920">normalizing</wd>

<space/>

<wd l="7277" t="1738" r="7690" b="1877">texts</wd>

<space/>

<wd l="7781" t="1718" r="8606" b="1920">produced</wd>

<space/>

<wd l="8693" t="1718" r="8918" b="1920">by</wd>

<space/>

<wd l="9010" t="1718" r="10488" b="1920">non-professional</wd>

<space/>

</ln>

<ln l="6130" t="1968" r="10483" b="2160" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="1968" r="6797" b="2126">internet</wd>

<space/>

<wd l="6989" t="2016" r="7493" b="2160">users,</wd>

<space/>

<wd l="7699" t="1968" r="7954" b="2126">i.e.</wd>

<space/>

<wd l="8160" t="1968" r="8664" b="2126">UGC.</wd>

<space/>

<wd l="8875" t="1973" r="9048" b="2122">In</wd>

<space/>

<wd l="9250" t="1968" r="10018" b="2160">addition,</wd>

<space/>

<wd l="10219" t="1968" r="10483" b="2126">the</wd>

<space/>

</ln>

<ln l="6125" t="2222" r="10483" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="2222" r="7344" b="2381">normalization</wd>

<space/>

<wd l="7421" t="2222" r="7618" b="2381">of</wd>

<space/>

<wd l="7666" t="2222" r="8126" b="2381">UGC</wd>

<space/>

<wd l="8198" t="2270" r="8578" b="2424">may</wd>

<space/>

<wd l="8650" t="2222" r="9288" b="2424">depend</wd>

<space/>

<wd l="9355" t="2270" r="9571" b="2381">on</wd>

<space/>

<wd l="9638" t="2222" r="9902" b="2381">the</wd>

<space/>

<wd l="9984" t="2222" r="10483" b="2381">social</wd>

<space/>

</ln>

<ln l="6125" t="2477" r="10488" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="2477" r="6662" b="2635">media</wd>

<space/>

<wd l="6763" t="2477" r="7210" b="2635">used.</wd>

<space/>

<wd l="7325" t="2482" r="7637" b="2635">For</wd>

<space/>

<wd l="7738" t="2477" r="8525" b="2678">example,</wd>

<space/>

<wd l="8635" t="2477" r="9077" b="2635">there</wd>

<space/>

<wd l="9182" t="2525" r="9446" b="2635">are</wd>

<space/>

<wd l="9562" t="2477" r="10488" b="2635">substantial</wd>

<space/>

</ln>

<ln l="6130" t="2726" r="10483" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="2726" r="7094" b="2885">differences</wd>

<space/>

<wd l="7205" t="2726" r="7944" b="2885">between</wd>

<space/>

<wd l="8059" t="2726" r="8491" b="2885">short</wd>

<space/>

<wd l="8597" t="2774" r="9341" b="2928">message</wd>

<space/>

<wd l="9451" t="2746" r="9864" b="2885">texts</wd>

<space/>

<wd l="9979" t="2726" r="10483" b="2928">(SMS</wd>

<space/>

</ln>

<ln l="6130" t="2981" r="10478" b="3182" baseLine="3134">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6130" t="2981" r="6446" b="3139">and</wd>

<space/>

<wd l="6490" t="2981" r="7598" b="3182">microblogs),</wd>

<space/>

<wd l="7656" t="2981" r="8275" b="3139">on-line</wd>

<space/>

<wd l="8323" t="2981" r="8770" b="3139">chats</wd>

<space/>

<wd l="8822" t="2981" r="9139" b="3139">and</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9182" t="2981" r="9691" b="3139">users’</wd>

<space/>

</run>

<wd l="9749" t="2981" r="10478" b="3139"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">reviews</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="6134" t="3235" r="10488" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6134" t="3235" r="6605" b="3394">Short</wd>

<space/>

<wd l="6720" t="3283" r="7546" b="3437">messages</wd>

<space/>

<wd l="7670" t="3235" r="7992" b="3394">and</wd>

<space/>

<wd l="8107" t="3235" r="8554" b="3394">chats</wd>

<space/>

<wd l="8678" t="3235" r="9307" b="3394">deviate</wd>

<space/>

<wd l="9432" t="3235" r="9917" b="3394">much</wd>

<space/>

<wd l="10037" t="3283" r="10488" b="3394">more</wd>

<space/>

</ln>

<ln l="6130" t="3490" r="10483" b="3691" baseLine="3638">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6130" t="3490" r="6557" b="3648">from</wd>

<space/>

<wd l="6643" t="3490" r="6912" b="3648">the</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7013" t="3490" r="7752" b="3648">standard</wd>

<space/>

<wd l="7843" t="3490" r="8626" b="3691">language</wd>

<space/>

<wd l="8722" t="3490" r="9101" b="3648">than</wd>

<space/>

<wd l="9187" t="3490" r="9701" b="3648">users’</wd>

<space/>

<wd l="9806" t="3490" r="10483" b="3648">reviews</wd>

<space/>

</run>

</ln>

<ln l="6130" t="3739" r="10488" b="3941" baseLine="3893">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6130" t="3739" r="6446" b="3898">and</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6504" t="3787" r="6768" b="3898">are</wd>

<space/>

<wd l="6830" t="3739" r="7766" b="3941">commonly</wd>

<space/>

<wd l="7819" t="3739" r="8597" b="3941">regarded</wd>

<space/>

<wd l="8654" t="3787" r="8827" b="3898">as</wd>

<space/>

<wd l="8894" t="3739" r="9466" b="3941">“noisy</wd>

<space/>

<wd l="9518" t="3739" r="10080" b="3898">texts”.</wd>

<space/>

<wd l="10152" t="3739" r="10488" b="3898">The</wd>

<space/>

</run>

</ln>

<ln l="6125" t="3994" r="10488" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="3994" r="7344" b="4152">normalization</wd>

<space/>

<wd l="7416" t="4042" r="8261" b="4195">processes</wd>

<space/>

<wd l="8347" t="3994" r="8549" b="4152">of</wd>

<space/>

<wd l="8611" t="3994" r="9043" b="4152">short</wd>

<space/>

<wd l="9120" t="4042" r="9998" b="4195">messages,</wd>

<space/>

<wd l="10094" t="3994" r="10488" b="4152">such</wd>

<space/>

</ln>

<ln l="6130" t="4248" r="10478" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="4296" r="6302" b="4406">as</wd>

<space/>

<wd l="6389" t="4248" r="6811" b="4406">SMS</wd>

<space/>

<wd l="6898" t="4248" r="7214" b="4406">and</wd>

<space/>

<wd l="7286" t="4248" r="7934" b="4406">Twitter</wd>

<space/>

<wd l="8002" t="4296" r="8827" b="4450">messages</wd>

<space/>

<wd l="8909" t="4248" r="9922" b="4450">(Contractor</wd>

<space/>

<wd l="9994" t="4267" r="10190" b="4406">et.</wd>

<space/>

<wd l="10282" t="4248" r="10478" b="4406">al.</wd>

<space/>

</ln>

<ln l="6130" t="4498" r="10478" b="4690" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="4498" r="6614" b="4690">2010;</wd>

<space/>

<wd l="6672" t="4498" r="6979" b="4656">Liu</wd>

<space/>

<wd l="7032" t="4517" r="7186" b="4656">et</wd>

<space/>

<wd l="7234" t="4498" r="7435" b="4656">al.</wd>

<space/>

<wd l="7493" t="4498" r="7978" b="4690">2011;</wd>

<space/>

<wd l="8035" t="4502" r="8405" b="4656">Han</wd>

<space/>

<wd l="8453" t="4517" r="8606" b="4656">et</wd>

<space/>

<wd l="8659" t="4498" r="8918" b="4690">al.,</wd>

<space/>

<wd l="8976" t="4498" r="9456" b="4690">2013;</wd>

<space/>

<wd l="9518" t="4498" r="9931" b="4690">Bali,</wd>

<space/>

<wd l="9994" t="4498" r="10478" b="4690">2013;</wd>

<space/>

</ln>

<ln l="6130" t="4752" r="10488" b="4954" baseLine="4906">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6130" t="4752" r="6979" b="4954">Chrupała,</wd>

<space/>

</run>

<wd l="7075" t="4752" r="7579" b="4954"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">2014</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="7675" t="4752" r="7992" b="4910">and</wd>

<space/>

<wd l="8074" t="4752" r="8635" b="4954">longer</wd>

<space/>

<wd l="8712" t="4752" r="9173" b="4910">UGC</wd>

<space/>

<wd l="9259" t="4771" r="9725" b="4944">texts,</wd>

<space/>

<wd l="9830" t="4752" r="10224" b="4910">such</wd>

<space/>

<wd l="10310" t="4800" r="10488" b="4910">as</wd>

<space/>

</run>

</ln>

<ln l="6125" t="5006" r="10488" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="5006" r="6806" b="5165">reviews</wd>

<space/>

<wd l="6864" t="5006" r="7181" b="5165">and</wd>

<space/>

<wd l="7219" t="5006" r="7742" b="5208">blogs,</wd>

<space/>

<wd l="7800" t="5006" r="8213" b="5165">have</wd>

<space/>

<wd l="8266" t="5006" r="8750" b="5165">much</wd>

<space/>

<wd l="8808" t="5006" r="8976" b="5160">in</wd>

<space/>

<wd l="9024" t="5054" r="9835" b="5198">common,</wd>

<space/>

<wd l="9888" t="5006" r="10176" b="5165">but</wd>

<space/>

<wd l="10219" t="5006" r="10488" b="5165">the</wd>

<space/>

</ln>

<ln l="6130" t="5261" r="10488" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="5261" r="7094" b="5419">differences</wd>

<space/>

<wd l="7195" t="5309" r="7459" b="5419">are</wd>

<space/>

<wd l="7565" t="5261" r="8558" b="5462">sufficiently</wd>

<space/>

<wd l="8654" t="5261" r="9576" b="5462">significant</wd>

<space/>

<wd l="9662" t="5280" r="9830" b="5419">to</wd>

<space/>

<wd l="9902" t="5261" r="10488" b="5462">justify</wd>

<space/>

</ln>

<ln l="6130" t="5510" r="8549" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="5510" r="7066" b="5712">addressing</wd>

<space/>

<wd l="7118" t="5510" r="7560" b="5669">them</wd>

<space/>

<wd l="7622" t="5510" r="8549" b="5712">separately.</wd>

</ln>

</para>

<para l="6120" t="5765" r="10512" b="9758" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="5765" r="10488" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="5765" r="7162" b="5923">Different</wd>

<space/>

<wd l="7238" t="5765" r="8462" b="5923">normalization</wd>

<space/>

<wd l="8539" t="5813" r="9322" b="5966">purposes</wd>

<space/>

<wd l="9408" t="5813" r="9782" b="5966">may</wd>

<space/>

<wd l="9864" t="5765" r="10488" b="5966">require</wd>

<space/>

</ln>

<ln l="6125" t="6019" r="10488" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6019" r="6394" b="6178">the</wd>

<space/>

<wd l="6499" t="6067" r="6792" b="6178">use</wd>

<space/>

<wd l="6902" t="6019" r="7104" b="6178">of</wd>

<space/>

<wd l="7195" t="6019" r="8299" b="6221">substantially</wd>

<space/>

<wd l="8410" t="6019" r="9163" b="6178">different</wd>

<space/>

<wd l="9264" t="6019" r="10488" b="6178">normalization</wd>

<space/>

</ln>

<ln l="6120" t="6269" r="10483" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6269" r="7085" b="6470">procedures</wd>

<space/>

<wd l="7267" t="6274" r="7574" b="6427">For</wd>

<space/>

<wd l="7752" t="6269" r="8539" b="6470">example,</wd>

<space/>

<wd l="8726" t="6269" r="9667" b="6470">converting</wd>

<space/>

<wd l="9840" t="6288" r="10483" b="6427">text-to-</wd>

</ln>

<ln l="6134" t="6523" r="10488" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6523" r="6725" b="6725">speech</wd>

<space/>

<wd l="6816" t="6523" r="7517" b="6725">requires</wd>

<space/>

<wd l="7608" t="6523" r="7877" b="6682">the</wd>

<space/>

<wd l="7973" t="6523" r="8861" b="6725">expansion</wd>

<space/>

<wd l="8957" t="6523" r="9158" b="6682">of</wd>

<space/>

<wd l="9230" t="6571" r="10070" b="6725">acronyms</wd>

<space/>

<wd l="10171" t="6523" r="10488" b="6682">and</wd>

<space/>

</ln>

<ln l="6130" t="6778" r="10512" b="6970" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6778" r="7358" b="6970">abbreviations,</wd>

<space/>

<wd l="7531" t="6826" r="7704" b="6936">as</wd>

<space/>

<wd l="7867" t="6778" r="8242" b="6936">well</wd>

<space/>

<wd l="8410" t="6826" r="8582" b="6936">as</wd>

<space/>

<wd l="8750" t="6778" r="9014" b="6936">the</wd>

<space/>

<wd l="9182" t="6778" r="10147" b="6936">conversion</wd>

<space/>

<wd l="10310" t="6778" r="10512" b="6936">of</wd>

<space/>

</ln>

<ln l="6125" t="7027" r="10488" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7027" r="6840" b="7186">numeric</wd>

<space/>

<wd l="6926" t="7075" r="7109" b="7186">or</wd>

<space/>

<wd l="7176" t="7027" r="8357" b="7186">mathematical</wd>

<space/>

<wd l="8438" t="7027" r="9456" b="7229">expressions</wd>

<space/>

<wd l="9538" t="7027" r="9874" b="7186">into</wd>

<space/>

<wd l="9955" t="7027" r="10488" b="7186">words</wd>

<space/>

</ln>

<ln l="6130" t="7282" r="10474" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7282" r="6720" b="7483">(Boros</wd>

<space/>

<wd l="6888" t="7301" r="7042" b="7440">et</wd>

<space/>

<wd l="7210" t="7282" r="7464" b="7474">al.,</wd>

<space/>

<wd l="7637" t="7282" r="8122" b="7474">2012,</wd>

<space/>

<wd l="8299" t="7282" r="9058" b="7483">Schlippe</wd>

<space/>

<wd l="9226" t="7301" r="9379" b="7440">et</wd>

<space/>

<wd l="9542" t="7282" r="9739" b="7440">al.</wd>

<space/>

<wd l="9917" t="7282" r="10474" b="7483">2012);</wd>

<space/>

</ln>

<ln l="6130" t="7536" r="10488" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7536" r="7123" b="7738">conversely,</wd>

<space/>

<wd l="7210" t="7536" r="8429" b="7694">normalization</wd>

<space/>

<wd l="8510" t="7536" r="8770" b="7694">for</wd>

<space/>

<wd l="8837" t="7584" r="9538" b="7738">purpose</wd>

<space/>

<wd l="9619" t="7536" r="9821" b="7694">of</wd>

<space/>

<wd l="9883" t="7536" r="10488" b="7738">storing</wd>

<space/>

</ln>

<ln l="6130" t="7790" r="10478" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7790" r="6494" b="7949">data</wd>

<space/>

<wd l="6586" t="7838" r="6960" b="7992">may</wd>

<space/>

<wd l="7051" t="7790" r="7766" b="7992">perform</wd>

<space/>

<wd l="7858" t="7790" r="8126" b="7949">the</wd>

<space/>

<wd l="8222" t="7790" r="9058" b="7949">reduction</wd>

<space/>

<wd l="9154" t="7790" r="9355" b="7949">of</wd>

<space/>

<wd l="9427" t="7790" r="9888" b="7949">word</wd>

<space/>

<wd l="9979" t="7790" r="10478" b="7949">forms</wd>

<space/>

</ln>

<ln l="6130" t="8040" r="10488" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="8040" r="6466" b="8198">into</wd>

<space/>

<wd l="6523" t="8040" r="6926" b="8198">their</wd>

<space/>

<wd l="6989" t="8059" r="7517" b="8198">stems.</wd>

<space/>

<wd l="7584" t="8045" r="8035" b="8198">Even</wd>

<space/>

<wd l="8098" t="8088" r="8194" b="8198">a</wd>

<space/>

<wd l="8246" t="8040" r="8818" b="8242">“noisy</wd>

<space/>

<wd l="8870" t="8040" r="9298" b="8198">text”</wd>

<space/>

<wd l="9360" t="8040" r="9557" b="8198">of</wd>

<space/>

<wd l="9590" t="8040" r="10051" b="8198">UGC</wd>

<space/>

<wd l="10109" t="8088" r="10488" b="8242">may</wd>

<space/>

</ln>

<ln l="6120" t="8294" r="10493" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="8294" r="6331" b="8453">be</wd>

<space/>

<wd l="6533" t="8294" r="7526" b="8453">normalized</wd>

<space/>

<wd l="7728" t="8294" r="7982" b="8453">for</wd>

<space/>

<wd l="8189" t="8294" r="8942" b="8453">different</wd>

<space/>

<wd l="9139" t="8342" r="9970" b="8496">purposes.</wd>

<space/>

<wd l="10181" t="8299" r="10493" b="8453">For</wd>

<space/>

</ln>

<ln l="6130" t="8549" r="10488" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="8549" r="6917" b="8750">example,</wd>

<space/>

<wd l="7080" t="8549" r="7570" b="8707">while</wd>

<space/>

<wd l="7728" t="8554" r="8611" b="8750">Mosquera</wd>

<space/>

<wd l="8770" t="8568" r="8923" b="8707">et</wd>

<space/>

<wd l="9086" t="8549" r="9283" b="8707">al.</wd>

<space/>

<wd l="9456" t="8549" r="10032" b="8750">(2012)</wd>

<space/>

<wd l="10195" t="8597" r="10488" b="8707">use</wd>

<space/>

</ln>

<ln l="6125" t="8798" r="10488" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="8798" r="7344" b="8957">normalization</wd>

<space/>

<wd l="7402" t="8818" r="7570" b="8957">to</wd>

<space/>

<wd l="7632" t="8798" r="8357" b="9000">improve</wd>

<space/>

<wd l="8414" t="8798" r="8683" b="8957">the</wd>

<space/>

<wd l="8746" t="8798" r="9830" b="9000">accessibility</wd>

<space/>

<wd l="9888" t="8798" r="10090" b="8957">of</wd>

<space/>

<wd l="10123" t="8798" r="10488" b="8957">web</wd>

<space/>

</ln>

<ln l="6130" t="9053" r="10474" b="9254" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9072" r="6821" b="9245">content,</wd>

<space/>

<wd l="6922" t="9053" r="7238" b="9211">Aw</wd>

<space/>

<wd l="7339" t="9072" r="7493" b="9211">et</wd>

<space/>

<wd l="7589" t="9053" r="7786" b="9211">al.</wd>

<space/>

<wd l="7896" t="9053" r="8472" b="9254">(2006)</wd>

<space/>

<wd l="8578" t="9053" r="8894" b="9211">and</wd>

<space/>

<wd l="8990" t="9053" r="9931" b="9211">Contractor</wd>

<space/>

<wd l="10027" t="9072" r="10181" b="9211">et</wd>

<space/>

<wd l="10277" t="9053" r="10474" b="9211">al.</wd>

<space/>

</ln>

<ln l="6130" t="9307" r="10488" b="9509" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9307" r="6706" b="9509">(2010)</wd>

<space/>

<wd l="6782" t="9355" r="7051" b="9466">see</wd>

<space/>

<wd l="7114" t="9307" r="7382" b="9466">the</wd>

<space/>

<wd l="7445" t="9307" r="8664" b="9466">normalization</wd>

<space/>

<wd l="8731" t="9355" r="8909" b="9466">as</wd>

<space/>

<wd l="8976" t="9355" r="9072" b="9466">a</wd>

<space/>

<wd l="9130" t="9307" r="10166" b="9509">prerequisite</wd>

<space/>

<wd l="10234" t="9307" r="10488" b="9466">for</wd>

<space/>

</ln>

<ln l="6130" t="9557" r="9034" b="9758" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9557" r="6581" b="9715">other</wd>

<space/>

<wd l="6638" t="9557" r="7493" b="9715">automatic</wd>

<space/>

<wd l="7546" t="9557" r="8496" b="9758">processing</wd>

<space/>

<wd l="8549" t="9557" r="9034" b="9715">tasks.</wd>

</ln>

</para>

<para l="6125" t="9811" r="10493" b="13051" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6350" t="9811" r="10483" b="10013" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="9811" r="7397" b="10013">Approaches</wd>

<space/>

<wd l="7555" t="9830" r="7723" b="9970">to</wd>

<space/>

<wd l="7882" t="9830" r="8213" b="9970">text</wd>

<space/>

<wd l="8371" t="9811" r="9590" b="9970">normalization</wd>

<space/>

<wd l="9749" t="9859" r="10123" b="10013">may</wd>

<space/>

<wd l="10272" t="9811" r="10483" b="9970">be</wd>

<space/>

</ln>

<ln l="6125" t="10066" r="10488" b="10267" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10066" r="6811" b="10267">roughly</wd>

<space/>

<wd l="6960" t="10066" r="7622" b="10224">divided</wd>

<space/>

<wd l="7771" t="10066" r="8107" b="10224">into</wd>

<space/>

<wd l="8256" t="10085" r="8582" b="10224">two</wd>

<space/>

<wd l="8741" t="10114" r="9379" b="10267">groups:</wd>

<space/>

<wd l="9547" t="10066" r="10008" b="10224">those</wd>

<space/>

<wd l="10157" t="10066" r="10488" b="10224">that</wd>

<space/>

</ln>

<ln l="6130" t="10320" r="10493" b="10522" baseLine="10469">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6130" t="10320" r="7066" b="10478">“translate”</wd>

<space/>

</run>

<wd l="7166" t="10320" r="8318" b="10478"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">non</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-standard</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8414" t="10320" r="9202" b="10522">language</wd>

<space/>

<wd l="9302" t="10320" r="9638" b="10478">into</wd>

<space/>

<wd l="9754" t="10320" r="10493" b="10478">standard</wd>

<space/>

</run>

</ln>

<ln l="6130" t="10570" r="10488" b="10771" baseLine="10723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10570" r="6917" b="10771">language</wd>

<space/>

<wd l="6989" t="10570" r="7464" b="10771">using</wd>

<space/>

<wd l="7536" t="10570" r="8448" b="10728">contextual</wd>

<space/>

<wd l="8525" t="10570" r="9557" b="10728">information</wd>

<space/>

<wd l="9629" t="10570" r="10205" b="10771">(based</wd>

<space/>

<wd l="10272" t="10618" r="10488" b="10728">on</wd>

<space/>

</ln>

<ln l="6130" t="10824" r="10488" b="11026" baseLine="10978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10824" r="6917" b="11026">language</wd>

<space/>

<wd l="7018" t="10824" r="7776" b="11026">models),</wd>

<space/>

<wd l="7882" t="10824" r="8203" b="10982">and</wd>

<space/>

<wd l="8294" t="10824" r="8755" b="10982">those</wd>

<space/>

<wd l="8851" t="10824" r="9182" b="10982">that</wd>

<space/>

<wd l="9278" t="10824" r="9912" b="11026">replace</wd>

<space/>

<wd l="10018" t="10824" r="10488" b="10982">OOV</wd>

<space/>

</ln>

<ln l="6125" t="11078" r="10488" b="11280" baseLine="11227" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11078" r="6658" b="11237">words</wd>

<space/>

<wd l="6778" t="11078" r="8074" b="11280">(lexical-based)</wd>

<space/>

<wd l="8189" t="11078" r="8414" b="11280">by</wd>

<space/>

<wd l="8530" t="11078" r="9202" b="11237">suitable</wd>

<space/>

<wd l="9322" t="11078" r="9821" b="11237">forms</wd>

<space/>

<wd l="9941" t="11078" r="10109" b="11232">in</wd>

<space/>

<wd l="10219" t="11078" r="10488" b="11237">the</wd>

<space/>

</ln>

<ln l="6134" t="11328" r="10488" b="11530" baseLine="11482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="11328" r="6878" b="11486">standard</wd>

<space/>

<wd l="7094" t="11328" r="7925" b="11530">language.</wd>

<space/>

<wd l="8160" t="11333" r="8467" b="11486">For</wd>

<space/>

<wd l="8688" t="11328" r="8957" b="11486">the</wd>

<space/>

<wd l="9182" t="11328" r="9677" b="11520">latter,</wd>

<space/>

<wd l="9907" t="11328" r="10488" b="11486">lexical</wd>

<space/>

</ln>

<ln l="6130" t="11582" r="10483" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="11582" r="7162" b="11741">information</wd>

<space/>

<wd l="7253" t="11582" r="7392" b="11741">is</wd>

<space/>

<wd l="7488" t="11582" r="8290" b="11774">essential;</wd>

<space/>

<wd l="8390" t="11582" r="8645" b="11741">for</wd>

<space/>

<wd l="8726" t="11582" r="8995" b="11741">the</wd>

<space/>

<wd l="9091" t="11582" r="9730" b="11774">former,</wd>

<space/>

<wd l="9821" t="11582" r="10483" b="11784">parallel</wd>

<space/>

</ln>

<ln l="6130" t="11837" r="10488" b="12038" baseLine="11986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="11885" r="6802" b="12038">corpora</wd>

<space/>

<wd l="6902" t="11837" r="7099" b="11995">of</wd>

<space/>

<wd l="7181" t="11837" r="8338" b="11995">non-standard</wd>

<space/>

<wd l="8438" t="11837" r="8755" b="11995">and</wd>

<space/>

<wd l="8861" t="11837" r="9600" b="11995">standard</wd>

<space/>

<wd l="9701" t="11837" r="10488" b="12038">language</wd>

<space/>

</ln>

<ln l="6130" t="12091" r="10488" b="12293" baseLine="12240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="12139" r="6394" b="12250">are</wd>

<space/>

<wd l="6600" t="12091" r="7373" b="12293">required.</wd>

<space/>

<wd l="7589" t="12091" r="8827" b="12250">Lexical-based</wd>

<space/>

<wd l="9034" t="12091" r="10013" b="12293">approaches</wd>

<space/>

<wd l="10224" t="12139" r="10488" b="12250">are</wd>

<space/>

</ln>

<ln l="6130" t="12341" r="10478" b="12542" baseLine="12494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="12341" r="7066" b="12542">commonly</wd>

<space/>

<wd l="7234" t="12341" r="7642" b="12499">used</wd>

<space/>

<wd l="7805" t="12360" r="7973" b="12499">to</wd>

<space/>

<wd l="8150" t="12341" r="9024" b="12499">normalize</wd>

<space/>

<wd l="9202" t="12341" r="9845" b="12542">general</wd>

<space/>

<wd l="10018" t="12360" r="10478" b="12533">texts,</wd>

<space/>

</ln>

<ln l="6125" t="12595" r="10488" b="12797" baseLine="12744" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12595" r="6840" b="12754">whereas</wd>

<space/>

<wd l="7061" t="12595" r="8808" b="12754">machine-translation</wd>

<space/>

<wd l="9024" t="12595" r="10003" b="12797">approaches</wd>

<space/>

<wd l="10224" t="12643" r="10488" b="12754">are</wd>

<space/>

</ln>

<ln l="6125" t="12850" r="10262" b="13051" baseLine="12998" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12850" r="6763" b="13051">usually</wd>

<space/>

<wd l="6821" t="12898" r="7018" b="13008">an</wd>

<space/>

<wd l="7080" t="12850" r="7642" b="13051">option</wd>

<space/>

<wd l="7694" t="12869" r="7862" b="13008">to</wd>

<space/>

<wd l="7920" t="12850" r="8443" b="13008">tackle</wd>

<space/>

<wd l="8510" t="12850" r="8933" b="13008">SMS</wd>

<space/>

<wd l="8995" t="12850" r="10262" b="13008">normalization.</wd>

</ln>

</para>

<para l="6125" t="13099" r="10493" b="13810" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6350" t="13099" r="10478" b="13301" baseLine="13253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="13099" r="6667" b="13258">Aw</wd>

<space/>

<wd l="6739" t="13118" r="6893" b="13258">et</wd>

<space/>

<wd l="6965" t="13099" r="7162" b="13258">al.</wd>

<space/>

<wd l="7243" t="13099" r="7819" b="13301">(2006)</wd>

<space/>

<wd l="7896" t="13099" r="8246" b="13258">first</wd>

<space/>

<wd l="8309" t="13099" r="9120" b="13301">proposed</wd>

<space/>

<wd l="9182" t="13118" r="9350" b="13258">to</wd>

<space/>

<wd l="9418" t="13099" r="9984" b="13301">regard</wd>

<space/>

<wd l="10056" t="13099" r="10478" b="13258">SMS</wd>

<space/>

</ln>

<ln l="6125" t="13354" r="10474" b="13555" baseLine="13507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13354" r="7344" b="13512">normalization</wd>

<space/>

<wd l="7430" t="13402" r="7603" b="13512">as</wd>

<space/>

<wd l="7694" t="13402" r="7790" b="13512">a</wd>

<space/>

<wd l="7867" t="13354" r="8606" b="13512">machine</wd>

<space/>

<wd l="8688" t="13354" r="9619" b="13512">translation</wd>

<space/>

<wd l="9691" t="13354" r="10474" b="13555">problem.</wd>

<space/>

</ln>

<ln l="6125" t="13608" r="10493" b="13810" baseLine="13757" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13613" r="6638" b="13810">Many</wd>

<space/>

<wd l="6720" t="13608" r="7171" b="13766">other</wd>

<space/>

<wd l="7258" t="13608" r="7853" b="13766">studies</wd>

<space/>

<wd l="7934" t="13608" r="8347" b="13766">have</wd>

<space/>

<wd l="8429" t="13608" r="9211" b="13766">followed</wd>

<space/>

<wd l="9288" t="13608" r="9600" b="13766">this</wd>

<space/>

<wd l="9686" t="13608" r="10493" b="13810">approach</wd>

</ln>

</para>

<para l="6125" t="14146" r="10488" b="14938" alignment="justified" spaceBefore="301" lsp="exactly" lspExact="204" language="en">

<ln l="6125" t="14146" r="10483" b="14318" baseLine="14270" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14146" r="6950" b="14318">recognizers</wd>

<space/>

<wd l="7056" t="14146" r="7330" b="14280">that</wd>

<space/>

<wd l="7435" t="14146" r="7752" b="14318">only</wd>

<space/>

<wd l="7853" t="14146" r="8549" b="14318">recognize</wd>

<space/>

<wd l="8659" t="14146" r="9096" b="14280">words</wd>

<space/>

<wd l="9211" t="14146" r="9346" b="14275">in</wd>

<space/>

<wd l="9456" t="14189" r="9533" b="14280">a</wd>

<space/>

<wd l="9638" t="14146" r="10003" b="14280">fixed</wd>

<space/>

<wd l="10114" t="14146" r="10483" b="14280">finite</wd>

<space/>

</ln>

<ln l="6125" t="14352" r="10488" b="14525" baseLine="14477">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="14352" r="7042" b="14525">vocabulary.”</wd>

<space/>

<wd l="7195" t="14362" r="7426" b="14486">IN:</wd>

<space/>

<wd l="7589" t="14362" r="7973" b="14525">Long</wd>

<space/>

<wd l="8126" t="14352" r="8429" b="14520">Qin.</wd>

<space/>

<wd l="8592" t="14357" r="8986" b="14486">2013.</wd>

<space/>

<wd l="9149" t="14352" r="9797" b="14525">Learning</wd>

<space/>

</run>

<wd l="9950" t="14357" r="10488" b="14486"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Out</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-of-</run>

</wd>

</ln>

<ln l="6125" t="14558" r="10488" b="14731" baseLine="14688" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14558" r="6965" b="14726">Vocabulary</wd>

<space/>

<wd l="7032" t="14558" r="7507" b="14693">Words</wd>

<space/>

<wd l="7584" t="14558" r="7723" b="14688">in</wd>

<space/>

<wd l="7790" t="14558" r="8549" b="14693">Automatic</wd>

<space/>

<wd l="8630" t="14558" r="9139" b="14726">Speech</wd>

<space/>

<wd l="9211" t="14558" r="10123" b="14731">Recognition.</wd>

<space/>

<wd l="10205" t="14558" r="10488" b="14693">Phd</wd>

<space/>

</ln>

<ln l="6130" t="14765" r="9187" b="14938" baseLine="14894" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="14765" r="6624" b="14899">Thesis.</wd>

<space/>

<wd l="6686" t="14765" r="7330" b="14938">Carnegie</wd>

<space/>

<wd l="7378" t="14765" r="7901" b="14899">Mellon</wd>

<space/>

<wd l="7944" t="14765" r="8731" b="14938">University.</wd>

<space/>

<wd l="8789" t="14770" r="9187" b="14899">2013.</wd>

</ln>

</para>

</column>

</section>

<dd l="1416" t="15736" r="10517" b="15977">

<para l="5804" t="15792" r="6143" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="39">

<wd l="5870" t="15792" r="6077" b="15946">39</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1416" marginTop="1417" marginRight="1392" marginBottom="1302" offsetX="-10" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1416" t="1417" r="10517" b="15416">

<column l="1416" t="1417" r="5813" b="15416">

<para l="1421" t="1464" r="5784" b="4195" alignment="justified" lsp="exactly" lspExact="252" language="en">

<ln l="1426" t="1464" r="5770" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="1464" r="2438" b="1666">(Contractor</wd>

<space/>

<wd l="2539" t="1483" r="2698" b="1622">et</wd>

<space/>

<wd l="2803" t="1464" r="3062" b="1656">al.,</wd>

<space/>

<wd l="3178" t="1464" r="3658" b="1656">2010;</wd>

<space/>

<wd l="3782" t="1464" r="4546" b="1666">Schlippe</wd>

<space/>

<wd l="4656" t="1483" r="4810" b="1622">et</wd>

<space/>

<wd l="4920" t="1464" r="5174" b="1656">al.,</wd>

<space/>

<wd l="5290" t="1464" r="5770" b="1656">2012;</wd>

<space/>

</ln>

<ln l="1421" t="1718" r="5779" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="1718" r="1834" b="1910">Bali,</wd>

<space/>

<wd l="1915" t="1718" r="2400" b="1910">2013,</wd>

<space/>

<wd l="2477" t="1738" r="2645" b="1877">to</wd>

<space/>

<wd l="2722" t="1718" r="3034" b="1877">cite</wd>

<space/>

<wd l="3086" t="1718" r="3422" b="1920">just</wd>

<space/>

<wd l="3499" t="1766" r="3595" b="1877">a</wd>

<space/>

<wd l="3667" t="1718" r="4104" b="1920">few).</wd>

<space/>

<wd l="4190" t="1718" r="4642" b="1920">They</wd>

<space/>

<wd l="4714" t="1718" r="5203" b="1877">differ</wd>

<space/>

<wd l="5275" t="1718" r="5443" b="1872">in</wd>

<space/>

<wd l="5510" t="1718" r="5779" b="1877">the</wd>

<space/>

</ln>

<ln l="1421" t="1968" r="5784" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="1968" r="2160" b="2126">machine</wd>

<space/>

<wd l="2251" t="1968" r="3178" b="2126">translation</wd>

<space/>

<wd l="3264" t="1968" r="4118" b="2170">technique</wd>

<space/>

<wd l="4210" t="1968" r="4906" b="2170">adopted</wd>

<space/>

<wd l="4992" t="2016" r="5174" b="2126">or</wd>

<space/>

<wd l="5266" t="1968" r="5434" b="2122">in</wd>

<space/>

<wd l="5515" t="1968" r="5784" b="2126">the</wd>

<space/>

</ln>

<ln l="1421" t="2222" r="5784" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="2222" r="2083" b="2381">method</wd>

<space/>

<wd l="2194" t="2222" r="2602" b="2381">used</wd>

<space/>

<wd l="2712" t="2242" r="2880" b="2381">to</wd>

<space/>

<wd l="3005" t="2222" r="3552" b="2381">obtain</wd>

<space/>

<wd l="3667" t="2222" r="3931" b="2381">the</wd>

<space/>

<wd l="4046" t="2222" r="4704" b="2424">parallel</wd>

<space/>

<wd l="4829" t="2270" r="5405" b="2424">corpus</wd>

<space/>

<wd l="5530" t="2222" r="5784" b="2381">for</wd>

<space/>

</ln>

<ln l="1421" t="2477" r="5774" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="2477" r="2102" b="2678">training</wd>

<space/>

<wd l="2294" t="2477" r="2611" b="2635">and</wd>

<space/>

<wd l="2798" t="2477" r="3754" b="2635">evaluation.</wd>

<space/>

<wd l="3950" t="2477" r="4267" b="2635">Aw</wd>

<space/>

<wd l="4459" t="2496" r="4613" b="2635">et</wd>

<space/>

<wd l="4805" t="2477" r="5002" b="2635">al.</wd>

<space/>

<wd l="5203" t="2477" r="5774" b="2678">(2006)</wd>

<space/>

</ln>

<ln l="1426" t="2726" r="5779" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2726" r="2438" b="2885">constructed</wd>

<space/>

<wd l="2544" t="2774" r="2640" b="2885">a</wd>

<space/>

<wd l="2736" t="2726" r="3394" b="2928">parallel</wd>

<space/>

<wd l="3509" t="2774" r="4085" b="2928">corpus</wd>

<space/>

<wd l="4195" t="2726" r="4589" b="2885">with</wd>

<space/>

<wd l="4699" t="2726" r="5184" b="2918">5,000</wd>

<space/>

<wd l="5304" t="2726" r="5779" b="2918">SMS,</wd>

<space/>

</ln>

<ln l="1426" t="2981" r="5779" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2981" r="2362" b="3139">Contractor</wd>

<space/>

<wd l="2539" t="3000" r="2693" b="3139">et</wd>

<space/>

<wd l="2870" t="2981" r="3067" b="3139">al.</wd>

<space/>

<wd l="3254" t="2981" r="3830" b="3182">(2010)</wd>

<space/>

<wd l="4013" t="2981" r="4872" b="3182">generated</wd>

<space/>

<wd l="5040" t="2981" r="5779" b="3139">artificial</wd>

<space/>

</ln>

<ln l="1426" t="3235" r="5779" b="3394" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="3235" r="2078" b="3394">“clean”</wd>

<space/>

<wd l="2290" t="3254" r="3120" b="3394">sentences</wd>

<space/>

<wd l="3331" t="3235" r="3499" b="3389">in</wd>

<space/>

<wd l="3706" t="3283" r="3802" b="3394">a</wd>

<space/>

<wd l="4013" t="3235" r="4829" b="3394">statistical</wd>

<space/>

<wd l="5035" t="3235" r="5779" b="3394">machine</wd>

<space/>

</ln>

<ln l="1421" t="3490" r="5779" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="3490" r="2347" b="3648">translation</wd>

<space/>

<wd l="2438" t="3490" r="3293" b="3691">approach,</wd>

<space/>

<wd l="3389" t="3490" r="3710" b="3648">and</wd>

<space/>

<wd l="3797" t="3490" r="4560" b="3691">Schlippe</wd>

<space/>

<wd l="4651" t="3509" r="4810" b="3648">et</wd>

<space/>

<wd l="4901" t="3490" r="5098" b="3648">al.</wd>

<space/>

<wd l="5203" t="3490" r="5779" b="3691">(2012)</wd>

<space/>

</ln>

<ln l="1426" t="3739" r="5779" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="3739" r="2438" b="3898">constructed</wd>

<space/>

<wd l="2486" t="3787" r="2582" b="3898">a</wd>

<space/>

<wd l="2626" t="3739" r="2990" b="3898">web</wd>

<space/>

<wd l="3043" t="3739" r="3806" b="3898">interface</wd>

<space/>

<wd l="3854" t="3758" r="4022" b="3898">to</wd>

<space/>

<wd l="4075" t="3739" r="4709" b="3898">receive</wd>

<space/>

<wd l="4766" t="3739" r="5779" b="3941">suggestions</wd>

<space/>

</ln>

<ln l="1426" t="3994" r="4622" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="3994" r="1627" b="4152">of</wd>

<space/>

<wd l="1661" t="3994" r="2117" b="4152">clean</wd>

<space/>

<wd l="2179" t="3994" r="2904" b="4152">versions</wd>

<space/>

<wd l="2971" t="3994" r="3168" b="4152">of</wd>

<space/>

<wd l="3202" t="3994" r="3682" b="4195">noisy</wd>

<space/>

<wd l="3744" t="4013" r="4622" b="4152">sentences.</wd>

</ln>

</para>

<para l="1421" t="4248" r="5794" b="10224" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="4248" r="5789" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="4253" r="2160" b="4450">Many</wd>

<space/>

<wd l="2237" t="4248" r="2837" b="4406">studies</wd>

<space/>

<wd l="2909" t="4248" r="3322" b="4406">have</wd>

<space/>

<wd l="3398" t="4248" r="4094" b="4450">adopted</wd>

<space/>

<wd l="4162" t="4296" r="4258" b="4406">a</wd>

<space/>

<wd l="4330" t="4248" r="4906" b="4406">lexical</wd>

<space/>

<wd l="4987" t="4248" r="5789" b="4450">approach</wd>

<space/>

</ln>

<ln l="1421" t="4498" r="5774" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="4517" r="1584" b="4656">to</wd>

<space/>

<wd l="1675" t="4498" r="2938" b="4656">normalization.</wd>

<space/>

<wd l="3034" t="4502" r="3341" b="4656">For</wd>

<space/>

<wd l="3427" t="4498" r="4210" b="4699">example,</wd>

<space/>

<wd l="4306" t="4498" r="4608" b="4656">Liu</wd>

<space/>

<wd l="4699" t="4517" r="4853" b="4656">et</wd>

<space/>

<wd l="4939" t="4498" r="5198" b="4690">al.,</wd>

<space/>

<wd l="5290" t="4498" r="5774" b="4690">2011,</wd>

<space/>

</ln>

<ln l="1426" t="4752" r="5779" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="4752" r="2030" b="4954">aiming</wd>

<space/>

<wd l="2074" t="4771" r="2242" b="4910">to</wd>

<space/>

<wd l="2290" t="4752" r="2813" b="4910">tackle</wd>

<space/>

<wd l="2870" t="4752" r="3293" b="4910">SMS</wd>

<space/>

<wd l="3346" t="4752" r="4613" b="4944">normalization,</wd>

<space/>

<wd l="4661" t="4752" r="5477" b="4954">proposed</wd>

<space/>

<wd l="5515" t="4752" r="5779" b="4910">the</wd>

<space/>

</ln>

<ln l="1426" t="5006" r="5784" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5006" r="2347" b="5208">generation</wd>

<space/>

<wd l="2434" t="5006" r="2630" b="5165">of</wd>

<space/>

<wd l="2688" t="5006" r="3768" b="5165">nonstandard</wd>

<space/>

<wd l="3840" t="5006" r="4411" b="5165">tokens</wd>

<space/>

<wd l="4493" t="5006" r="4718" b="5208">by</wd>

<space/>

<wd l="4786" t="5006" r="5784" b="5208">performing</wd>

<space/>

</ln>

<ln l="1426" t="5261" r="5779" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5261" r="1872" b="5419">letter</wd>

<space/>

<wd l="1915" t="5261" r="3211" b="5419">transformation</wd>

<space/>

<wd l="3264" t="5309" r="3480" b="5419">on</wd>

<space/>

<wd l="3523" t="5261" r="3792" b="5419">the</wd>

<space/>

<wd l="3845" t="5261" r="4733" b="5462">dictionary</wd>

<space/>

<wd l="4776" t="5261" r="5362" b="5419">words.</wd>

<space/>

<wd l="5419" t="5266" r="5779" b="5419">Han</wd>

<space/>

</ln>

<ln l="1426" t="5510" r="5779" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5530" r="1579" b="5669">et</wd>

<space/>

<wd l="1637" t="5510" r="1834" b="5669">al.</wd>

<space/>

<wd l="1906" t="5510" r="2482" b="5712">(2013)</wd>

<space/>

<wd l="2549" t="5510" r="3341" b="5669">observed</wd>

<space/>

<wd l="3394" t="5510" r="3725" b="5669">that</wd>

<space/>

<wd l="3778" t="5530" r="4205" b="5669">most</wd>

<space/>

<wd l="4262" t="5510" r="5160" b="5669">ill-formed</wd>

<space/>

<wd l="5208" t="5510" r="5779" b="5669">tokens</wd>

<space/>

</ln>

<ln l="1426" t="5765" r="5779" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5765" r="1589" b="5918">in</wd>

<space/>

<wd l="1651" t="5765" r="2299" b="5923">Twitter</wd>

<space/>

<wd l="2357" t="5813" r="2621" b="5923">are</wd>

<space/>

<wd l="2678" t="5765" r="4560" b="5966">morphophonemically</wd>

<space/>

<wd l="4627" t="5765" r="5232" b="5923">similar</wd>

<space/>

<wd l="5285" t="5784" r="5453" b="5923">to</wd>

<space/>

<wd l="5515" t="5765" r="5779" b="5923">the</wd>

<space/>

</ln>

<ln l="1421" t="6019" r="5779" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6019" r="2309" b="6221">respective</wd>

<space/>

<wd l="2381" t="6038" r="2986" b="6178">correct</wd>

<space/>

<wd l="3053" t="6019" r="3605" b="6178">forms.</wd>

<space/>

<wd l="3682" t="6019" r="4224" b="6178">Based</wd>

<space/>

<wd l="4286" t="6067" r="4502" b="6178">on</wd>

<space/>

<wd l="4565" t="6019" r="4882" b="6178">this</wd>

<space/>

<wd l="4954" t="6019" r="5779" b="6211">evidence,</wd>

<space/>

</ln>

<ln l="1421" t="6269" r="5779" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6269" r="1800" b="6470">they</wd>

<space/>

<wd l="2016" t="6269" r="2837" b="6470">proposed</wd>

<space/>

<wd l="3058" t="6317" r="3264" b="6427">an</wd>

<space/>

<wd l="3494" t="6269" r="4349" b="6427">automatic</wd>

<space/>

<wd l="4584" t="6269" r="5390" b="6470">approach</wd>

<space/>

<wd l="5611" t="6288" r="5779" b="6427">to</wd>

<space/>

</ln>

<ln l="1426" t="6523" r="5784" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6523" r="2510" b="6725">constructing</wd>

<space/>

<wd l="2582" t="6571" r="2678" b="6682">a</wd>

<space/>

<wd l="2755" t="6542" r="2995" b="6682">set</wd>

<space/>

<wd l="3072" t="6523" r="3269" b="6682">of</wd>

<space/>

<wd l="3322" t="6523" r="3778" b="6682">word</wd>

<space/>

<wd l="3845" t="6523" r="4536" b="6682">variants</wd>

<space/>

<wd l="4608" t="6523" r="4834" b="6725">by</wd>

<space/>

<wd l="4901" t="6523" r="5381" b="6725">using</wd>

<space/>

<wd l="5458" t="6523" r="5784" b="6682">edit</wd>

<space/>

</ln>

<ln l="1426" t="6778" r="5784" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6778" r="2136" b="6936">distance</wd>

<space/>

<wd l="2198" t="6778" r="2520" b="6936">and</wd>

<space/>

<wd l="2563" t="6778" r="3427" b="6979">phonemic</wd>

<space/>

<wd l="3490" t="6778" r="4651" b="6979">transcription;</wd>

<space/>

<wd l="4723" t="6778" r="5342" b="6979">finally,</wd>

<space/>

<wd l="5405" t="6778" r="5784" b="6979">they</wd>

<space/>

</ln>

<ln l="1421" t="7027" r="5784" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7027" r="2021" b="7186">ranked</wd>

<space/>

<wd l="2107" t="7027" r="2376" b="7186">the</wd>

<space/>

<wd l="2477" t="7027" r="3394" b="7186">candidates</wd>

<space/>

<wd l="3490" t="7027" r="3965" b="7229">using</wd>

<space/>

<wd l="4061" t="7075" r="4157" b="7186">a</wd>

<space/>

<wd l="4248" t="7027" r="4896" b="7229">trigram</wd>

<space/>

<wd l="4992" t="7027" r="5784" b="7229">language</wd>

<space/>

</ln>

<ln l="1421" t="7282" r="5789" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7282" r="2011" b="7440">model.</wd>

<space/>

<wd l="2174" t="7286" r="3062" b="7483">Mosquera</wd>

<space/>

<wd l="3216" t="7301" r="3370" b="7440">et</wd>

<space/>

<wd l="3528" t="7282" r="3725" b="7440">al.</wd>

<space/>

<wd l="3893" t="7282" r="4469" b="7483">(2012)</wd>

<space/>

<wd l="4637" t="7282" r="5539" b="7483">developed</wd>

<space/>

<wd l="5693" t="7330" r="5789" b="7440">a</wd>

<space/>

</ln>

<ln l="1421" t="7536" r="5789" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7536" r="2491" b="7738">multilingual</wd>

<space/>

<wd l="2563" t="7536" r="3725" b="7694">lexical-based</wd>

<space/>

<wd l="3787" t="7536" r="4594" b="7738">approach</wd>

<space/>

<wd l="4661" t="7536" r="5405" b="7738">(English</wd>

<space/>

<wd l="5472" t="7536" r="5789" b="7694">and</wd>

<space/>

</ln>

<ln l="1430" t="7790" r="5774" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="7790" r="2184" b="7992">Spanish)</wd>

<space/>

<wd l="2270" t="7810" r="2438" b="7949">to</wd>

<space/>

<wd l="2530" t="7790" r="3408" b="7949">normalize</wd>

<space/>

<wd l="3499" t="7790" r="4133" b="7992">general</wd>

<space/>

<wd l="4224" t="7810" r="4555" b="7949">text</wd>

<space/>

<wd l="4642" t="7790" r="5064" b="7949">from</wd>

<space/>

<wd l="5150" t="7838" r="5246" b="7949">a</wd>

<space/>

<wd l="5328" t="7838" r="5774" b="7949">news</wd>

<space/>

</ln>

<ln l="1426" t="8040" r="5774" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8088" r="2050" b="8242">corpus.</wd>

<space/>

<wd l="2198" t="8040" r="2539" b="8198">The</wd>

<space/>

<wd l="2678" t="8040" r="3658" b="8242">approaches</wd>

<space/>

<wd l="3802" t="8040" r="4003" b="8198">of</wd>

<space/>

<wd l="4118" t="8040" r="5146" b="8242">Ringlstetter</wd>

<space/>

<wd l="5280" t="8059" r="5438" b="8198">et</wd>

<space/>

<wd l="5578" t="8040" r="5774" b="8198">al.</wd>

<space/>

</ln>

<ln l="1426" t="8294" r="5789" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8294" r="2054" b="8496">(2006),</wd>

<space/>

<wd l="2165" t="8294" r="2654" b="8453">Clark</wd>

<space/>

<wd l="2750" t="8294" r="3067" b="8453">and</wd>

<space/>

<wd l="3163" t="8294" r="3658" b="8453">Araki</wd>

<space/>

<wd l="3763" t="8294" r="4397" b="8496">(2011),</wd>

<space/>

<wd l="4507" t="8294" r="4824" b="8453">and</wd>

<space/>

<wd l="4920" t="8294" r="5789" b="8453">Bildhauer</wd>

<space/>

</ln>

<ln l="1426" t="8549" r="5784" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8549" r="1742" b="8707">and</wd>

<space/>

<wd l="1838" t="8549" r="2506" b="8707">Schäfer</wd>

<space/>

<wd l="2597" t="8549" r="3173" b="8750">(2013)</wd>

<space/>

<wd l="3278" t="8597" r="3542" b="8707">are</wd>

<space/>

<wd l="3643" t="8549" r="4248" b="8707">similar</wd>

<space/>

<wd l="4339" t="8568" r="4502" b="8707">to</wd>

<space/>

<wd l="4608" t="8597" r="5030" b="8741">ours,</wd>

<space/>

<wd l="5136" t="8597" r="5309" b="8707">as</wd>

<space/>

<wd l="5405" t="8549" r="5784" b="8750">they</wd>

<space/>

</ln>

<ln l="1421" t="8798" r="5779" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8798" r="1987" b="9000">regard</wd>

<space/>

<wd l="2035" t="8798" r="3259" b="8957">normalization</wd>

<space/>

<wd l="3312" t="8846" r="3490" b="8957">as</wd>

<space/>

<wd l="3552" t="8846" r="3648" b="8957">a</wd>

<space/>

<wd l="3696" t="8798" r="4368" b="8957">number</wd>

<space/>

<wd l="4426" t="8798" r="4627" b="8957">of</wd>

<space/>

<wd l="4670" t="8798" r="5779" b="9000">subproblems</wd>

<space/>

</ln>

<ln l="1421" t="9053" r="5794" b="9254" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9072" r="1584" b="9211">to</wd>

<space/>

<wd l="1757" t="9053" r="1968" b="9211">be</wd>

<space/>

<wd l="2150" t="9053" r="2722" b="9211">solved</wd>

<space/>

<wd l="2894" t="9053" r="3062" b="9206">in</wd>

<space/>

<wd l="3245" t="9101" r="4085" b="9254">sequence.</wd>

<space/>

<wd l="4272" t="9058" r="4450" b="9206">In</wd>

<space/>

<wd l="4627" t="9053" r="5794" b="9211">lexical-based</wd>

<space/>

</ln>

<ln l="1426" t="9307" r="5774" b="9509" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="9307" r="2405" b="9509">approaches</wd>

<space/>

<wd l="2554" t="9326" r="2722" b="9466">to</wd>

<space/>

<wd l="2870" t="9307" r="4094" b="9466">normalization</wd>

<space/>

<wd l="4243" t="9307" r="4445" b="9466">of</wd>

<space/>

<wd l="4570" t="9307" r="4930" b="9466">web</wd>

<space/>

<wd l="5088" t="9326" r="5774" b="9499">content,</wd>

<space/>

</ln>

<ln l="1426" t="9557" r="5779" b="9758" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="9557" r="2150" b="9715">lexicons</wd>

<space/>

<wd l="2299" t="9557" r="2683" b="9758">play</wd>

<space/>

<wd l="2837" t="9605" r="3043" b="9715">an</wd>

<space/>

<wd l="3197" t="9557" r="4046" b="9758">important</wd>

<space/>

<wd l="4200" t="9557" r="4541" b="9715">role</wd>

<space/>

<wd l="4694" t="9557" r="5016" b="9715">and</wd>

<space/>

<wd l="5160" t="9557" r="5779" b="9758">require</wd>

<space/>

</ln>

<ln l="1426" t="9811" r="5779" b="10013" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="9830" r="2155" b="9970">constant</wd>

<space/>

<wd l="2323" t="9811" r="3091" b="10013">updating</wd>

<space/>

<wd l="3259" t="9830" r="3427" b="9970">to</wd>

<space/>

<wd l="3605" t="9811" r="4013" b="10013">keep</wd>

<space/>

<wd l="4181" t="9859" r="4589" b="10013">pace</wd>

<space/>

<wd l="4762" t="9811" r="5150" b="9970">with</wd>

<space/>

<wd l="5318" t="9811" r="5779" b="9970">UGC</wd>

<space/>

</ln>

<ln l="1426" t="10066" r="2491" b="10224" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10066" r="2491" b="10224">innovations.</wd>

</ln>

</para>

<para l="1421" t="10526" r="5779" b="11021" alignment="justified" li="360" spaceBefore="212" fli="-360" lsp="exactly" lspExact="274" language="en">

<ln l="1421" t="10526" r="5779" b="10699" baseLine="10694" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10531" r="1594" b="10699">3.</wd>

<space/>

<wd l="1790" t="10531" r="3331" b="10699">Characteristics</wd>

<space/>

<wd l="3643" t="10531" r="3859" b="10699">of</wd>

<space/>

<wd l="4147" t="10531" r="5779" b="10699">User-Generated</wd>

<space/>

</ln>

<ln l="1790" t="10800" r="4565" b="11021" baseLine="10968" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1790" t="10805" r="2606" b="10973">Content</wd>

<space/>

<wd l="2669" t="10805" r="2861" b="10968">in</wd>

<space/>

<wd l="2923" t="10805" r="3739" b="11021">product</wd>

<space/>

<wd l="3802" t="10805" r="4565" b="10973">reviews</wd>

</ln>

</para>

<para l="1421" t="11232" r="5789" b="14722" alignment="justified" spaceBefore="153" spaceAfter="322" lsp="exactly" lspExact="253" language="en">

<ln l="1426" t="11232" r="5784" b="11390" baseLine="11381" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="11232" r="1762" b="11390">The</wd>

<space/>

<wd l="1872" t="11232" r="3120" b="11390">characteristics</wd>

<space/>

<wd l="3230" t="11280" r="3485" b="11390">we</wd>

<space/>

<wd l="3595" t="11232" r="4320" b="11390">describe</wd>

<space/>

<wd l="4430" t="11232" r="4598" b="11386">in</wd>

<space/>

<wd l="4704" t="11232" r="5016" b="11390">this</wd>

<space/>

<wd l="5131" t="11232" r="5784" b="11390">Section</wd>

<space/>

</ln>

<ln l="1421" t="11482" r="5784" b="11683" baseLine="11635" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="11482" r="1834" b="11640">have</wd>

<space/>

<wd l="1949" t="11482" r="2371" b="11640">been</wd>

<space/>

<wd l="2501" t="11482" r="3293" b="11640">observed</wd>

<space/>

<wd l="3418" t="11482" r="3586" b="11635">in</wd>

<space/>

<wd l="3706" t="11482" r="3970" b="11640">the</wd>

<space/>

<wd l="4099" t="11530" r="4680" b="11683">corpus</wd>

<space/>

<wd l="4810" t="11482" r="5011" b="11640">of</wd>

<space/>

<wd l="5102" t="11482" r="5784" b="11683">product</wd>

<space/>

</ln>

<ln l="1421" t="11736" r="5774" b="11938" baseLine="11885" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="11736" r="2102" b="11894">reviews</wd>

<space/>

<wd l="2155" t="11736" r="2947" b="11938">Buscapé,</wd>

<space/>

<wd l="3005" t="11736" r="3413" b="11894">built</wd>

<space/>

<wd l="3456" t="11736" r="3682" b="11938">by</wd>

<space/>

<wd l="3734" t="11741" r="4613" b="11894">Hartmann</wd>

<space/>

<wd l="4670" t="11755" r="4824" b="11894">et</wd>

<space/>

<wd l="4882" t="11736" r="5078" b="11894">al.</wd>

<space/>

<wd l="5146" t="11736" r="5774" b="11938">(2014).</wd>

<space/>

</ln>

<ln l="1426" t="11990" r="5779" b="12192" baseLine="12139" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="11990" r="1762" b="12149">The</wd>

<space/>

<wd l="1915" t="12038" r="2491" b="12192">corpus</wd>

<space/>

<wd l="2650" t="11990" r="2789" b="12149">is</wd>

<space/>

<wd l="2942" t="11990" r="3211" b="12149">the</wd>

<space/>

<wd l="3365" t="11990" r="3854" b="12149">result</wd>

<space/>

<wd l="4008" t="11990" r="4210" b="12149">of</wd>

<space/>

<wd l="4339" t="11990" r="5107" b="12192">crawling</wd>

<space/>

<wd l="5261" t="12038" r="5467" b="12149">an</wd>

<space/>

<wd l="5621" t="12038" r="5779" b="12149">e-</wd>

</ln>

<ln l="1426" t="12240" r="5779" b="12442" baseLine="12394" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="12288" r="2333" b="12398">commerce</wd>

<space/>

<wd l="2443" t="12240" r="2995" b="12398">search</wd>

<space/>

<wd l="3101" t="12240" r="3682" b="12442">engine</wd>

<space/>

<wd l="3782" t="12240" r="3984" b="12398">of</wd>

<space/>

<wd l="4070" t="12288" r="4512" b="12398">same</wd>

<space/>

<wd l="4613" t="12288" r="5136" b="12432">name,</wd>

<space/>

<wd l="5246" t="12240" r="5779" b="12398">where</wd>

<space/>

</ln>

<ln l="1421" t="12494" r="5770" b="12696" baseLine="12648" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="12542" r="1867" b="12653">users</wd>

<space/>

<wd l="1944" t="12542" r="2246" b="12653">can</wd>

<space/>

<wd l="2309" t="12514" r="2683" b="12696">post</wd>

<space/>

<wd l="2755" t="12514" r="3648" b="12653">comments</wd>

<space/>

<wd l="3725" t="12494" r="4210" b="12653">about</wd>

<space/>

<wd l="4286" t="12494" r="4896" b="12653">several</wd>

<space/>

<wd l="4968" t="12494" r="5770" b="12696">products.</wd>

<space/>

</ln>

<ln l="1426" t="12749" r="5770" b="12950" baseLine="12898" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="12749" r="1805" b="12907">This</wd>

<space/>

<wd l="1862" t="12797" r="2438" b="12950">corpus</wd>

<space/>

<wd l="2496" t="12749" r="3182" b="12907">consists</wd>

<space/>

<wd l="3240" t="12749" r="3442" b="12907">of</wd>

<space/>

<wd l="3475" t="12749" r="4066" b="12941">85,910</wd>

<space/>

<wd l="4118" t="12749" r="4848" b="12941">reviews,</wd>

<space/>

<wd l="4906" t="12749" r="5770" b="12941">4,097,905</wd>

<space/>

</ln>

<ln l="1421" t="13003" r="5779" b="13205" baseLine="13152" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="13003" r="1987" b="13162">tokens</wd>

<space/>

<wd l="2112" t="13003" r="2429" b="13162">and</wd>

<space/>

<wd l="2539" t="13003" r="3130" b="13195">90,513</wd>

<space/>

<wd l="3254" t="13022" r="3763" b="13205">types.</wd>

<space/>

<wd l="3883" t="13003" r="4354" b="13162">After</wd>

<space/>

<wd l="4464" t="13003" r="5304" b="13205">removing</wd>

<space/>

<wd l="5424" t="13022" r="5779" b="13205">stop</wd>

<space/>

</ln>

<ln l="1421" t="13253" r="5774" b="13454" baseLine="13406" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="13253" r="2006" b="13445">words,</wd>

<space/>

<wd l="2107" t="13253" r="2861" b="13411">numbers</wd>

<space/>

<wd l="2966" t="13253" r="3288" b="13411">and</wd>

<space/>

<wd l="3374" t="13253" r="4464" b="13454">punctuation,</wd>

<space/>

<wd l="4574" t="13253" r="4690" b="13411">it</wd>

<space/>

<wd l="4790" t="13253" r="5078" b="13411">has</wd>

<space/>

<wd l="5189" t="13253" r="5774" b="13445">63,917</wd>

<space/>

</ln>

<ln l="1421" t="13507" r="5789" b="13709" baseLine="13656" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="13526" r="1930" b="13709">types,</wd>

<space/>

<wd l="1987" t="13507" r="2410" b="13666">from</wd>

<space/>

<wd l="2448" t="13507" r="2990" b="13666">which</wd>

<space/>

<wd l="3034" t="13507" r="3634" b="13699">34,774</wd>

<space/>

<wd l="3686" t="13555" r="3946" b="13666">are</wd>

<space/>

<wd l="3998" t="13507" r="4464" b="13666">OOV</wd>

<space/>

<wd l="4512" t="13507" r="5093" b="13666">words.</wd>

<space/>

<wd l="5146" t="13512" r="5386" b="13666">To</wd>

<space/>

<wd l="5434" t="13507" r="5789" b="13666">find</wd>

<space/>

</ln>

<ln l="1426" t="13762" r="5779" b="13954" baseLine="13910" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="13762" r="1891" b="13920">OOV</wd>

<space/>

<wd l="2016" t="13762" r="2606" b="13954">words,</wd>

<space/>

<wd l="2736" t="13810" r="2990" b="13920">we</wd>

<space/>

<wd l="3115" t="13762" r="3523" b="13920">used</wd>

<space/>

<wd l="3638" t="13762" r="4627" b="13954">Unitex-PB,</wd>

<space/>

<wd l="4762" t="13810" r="4858" b="13920">a</wd>

<space/>

<wd l="4973" t="13762" r="5779" b="13920">Brazilian</wd>

<space/>

</ln>

<ln l="1421" t="14011" r="5779" b="14213" baseLine="14165" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14016" r="2395" b="14213">Portuguese</wd>

<space/>

<wd l="2486" t="14011" r="3130" b="14170">lexicon</wd>

<space/>

<wd l="3221" t="14011" r="3859" b="14213">(Muniz</wd>

<space/>

<wd l="3950" t="14030" r="4147" b="14170">et.</wd>

<space/>

<wd l="4248" t="14011" r="4450" b="14170">al.</wd>

<space/>

<wd l="4546" t="14011" r="5098" b="14213">2005).</wd>

<space/>

<wd l="5194" t="14011" r="5779" b="14170">Words</wd>

<space/>

</ln>

<ln l="1421" t="14266" r="5784" b="14467" baseLine="14419" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14266" r="1747" b="14424">that</wd>

<space/>

<wd l="1891" t="14266" r="2290" b="14424">miss</wd>

<space/>

<wd l="2438" t="14314" r="2534" b="14424">a</wd>

<space/>

<wd l="2678" t="14266" r="3389" b="14424">diacritic</wd>

<space/>

<wd l="3542" t="14266" r="4099" b="14467">(3,652</wd>

<space/>

<wd l="4253" t="14314" r="4435" b="14424">or</wd>

<space/>

<wd l="4594" t="14266" r="5203" b="14467">10.2%)</wd>

<space/>

<wd l="5357" t="14314" r="5784" b="14424">were</wd>

<space/>

</ln>

<ln l="1426" t="14520" r="5784" b="14722" baseLine="14669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="14520" r="2616" b="14722">automatically</wd>

<space/>

<wd l="2784" t="14520" r="3643" b="14678">corrected.</wd>

<space/>

<wd l="3816" t="14525" r="4296" b="14678">From</wd>

<space/>

<wd l="4459" t="14520" r="4728" b="14678">the</wd>

<space/>

<wd l="4891" t="14520" r="5784" b="14722">remaining</wd>

</ln>

</para>

<rulerline l="1416" t="15067" r="4301" b="15067" type="single" width="19" color="000000"/>

<para l="1426" t="15211" r="2698" b="15408" alignment="left" spaceBefore="149" lsp="exactly" lspExact="203" language="en">

<ln l="1426" t="15211" r="2698" b="15408" baseLine="15358">

<run underlined="none" subsuperscript="none" fontSize="650" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="15211" r="1483" b="15298">2</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1531" t="15240" r="2698" b="15408">http://aspell.net/</wd>

</run>

</ln>

</para>

</column>

<column l="6120" t="1417" r="10517" b="14827">

<para l="6125" t="1464" r="10498" b="6221" alignment="justified" spaceBefore="1" lsp="exactly" lspExact="252" language="en">

<ln l="6130" t="1464" r="10493" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="1464" r="6715" b="1656">31,123</wd>

<space/>

<wd l="6830" t="1464" r="7301" b="1622">OOV</wd>

<space/>

<wd l="7392" t="1464" r="7978" b="1656">words,</wd>

<space/>

<wd l="8078" t="1512" r="8333" b="1622">we</wd>

<space/>

<wd l="8438" t="1464" r="9216" b="1666">analyzed</wd>

<space/>

<wd l="9317" t="1469" r="9850" b="1656">5,775,</wd>

<space/>

<wd l="9950" t="1464" r="10493" b="1622">which</wd>

<space/>

</ln>

<ln l="6130" t="1718" r="10488" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="1718" r="7109" b="1920">correspond</wd>

<space/>

<wd l="7291" t="1738" r="7459" b="1877">to</wd>

<space/>

<wd l="7651" t="1718" r="8184" b="1877">words</wd>

<space/>

<wd l="8371" t="1718" r="8765" b="1877">with</wd>

<space/>

<wd l="8952" t="1766" r="9403" b="1877">more</wd>

<space/>

<wd l="9590" t="1718" r="9965" b="1877">than</wd>

<space/>

<wd l="10162" t="1738" r="10488" b="1877">two</wd>

<space/>

</ln>

<ln l="6130" t="1968" r="10488" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="2016" r="7171" b="2126">occurrences</wd>

<space/>

<wd l="7229" t="1968" r="7397" b="2122">in</wd>

<space/>

<wd l="7445" t="1968" r="7714" b="2126">the</wd>

<space/>

<wd l="7766" t="2016" r="8395" b="2170">corpus.</wd>

<space/>

<wd l="8467" t="1968" r="8894" b="2126">Such</wd>

<space/>

<wd l="8952" t="1968" r="9422" b="2126">OOV</wd>

<space/>

<wd l="9475" t="1968" r="10008" b="2126">words</wd>

<space/>

<wd l="10066" t="2016" r="10488" b="2126">were</wd>

<space/>

</ln>

<ln l="6130" t="2222" r="10488" b="2414" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="2222" r="6960" b="2381">classified</wd>

<space/>

<wd l="7003" t="2222" r="7171" b="2376">in</wd>

<space/>

<wd l="7219" t="2270" r="7315" b="2381">a</wd>

<space/>

<wd l="7363" t="2222" r="8486" b="2381">double-blind</wd>

<space/>

<wd l="8530" t="2222" r="9456" b="2381">annotation</wd>

<space/>

<wd l="9499" t="2222" r="9898" b="2414">task,</wd>

<space/>

<wd l="9950" t="2222" r="10488" b="2381">which</wd>

<space/>

</ln>

<ln l="6130" t="2477" r="10488" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="2477" r="6888" b="2635">obtained</wd>

<space/>

<wd l="7066" t="2477" r="7550" b="2635">0.752</wd>

<space/>

<wd l="7738" t="2477" r="7934" b="2635">of</wd>

<space/>

<wd l="8093" t="2477" r="9398" b="2635">inter-annotator</wd>

<space/>

<wd l="9576" t="2496" r="10488" b="2678">agreement</wd>

<space/>

</ln>

<ln l="6130" t="2726" r="10488" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="2726" r="6778" b="2928">(Kappa</wd>

<space/>

<wd l="6888" t="2726" r="7685" b="2918">statistics,</wd>

<space/>

<wd l="7805" t="2726" r="8544" b="2918">Carletta,</wd>

<space/>

<wd l="8678" t="2726" r="9211" b="2928">1996).</wd>

<space/>

<wd l="9336" t="2726" r="9672" b="2885">The</wd>

<space/>

<wd l="9787" t="2726" r="10488" b="2928">analysis</wd>

<space/>

</ln>

<ln l="6134" t="2981" r="10488" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6134" t="2981" r="6802" b="3139">showed</wd>

<space/>

<wd l="6994" t="2981" r="7325" b="3139">that</wd>

<space/>

<wd l="7531" t="2981" r="7925" b="3139">such</wd>

<space/>

<wd l="8126" t="2981" r="8592" b="3139">OOV</wd>

<space/>

<wd l="8794" t="2981" r="9326" b="3139">words</wd>

<space/>

<wd l="9528" t="3029" r="10488" b="3182">encompass</wd>

<space/>

</ln>

<ln l="6125" t="3235" r="10483" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="3235" r="7272" b="3437">misspellings,</wd>

<space/>

<wd l="7330" t="3235" r="7920" b="3394">named</wd>

<space/>

<wd l="7973" t="3235" r="8597" b="3394">entities</wd>

<space/>

<wd l="8654" t="3235" r="9278" b="3394">written</wd>

<space/>

<wd l="9336" t="3235" r="9504" b="3389">in</wd>

<space/>

<wd l="9557" t="3235" r="10483" b="3427">lowercase,</wd>

<space/>

</ln>

<ln l="6130" t="3490" r="10498" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="3490" r="6758" b="3691">foreign</wd>

<space/>

<wd l="6874" t="3490" r="7243" b="3648">loan</wd>

<space/>

<wd l="7358" t="3490" r="7891" b="3648">words</wd>

<space/>

<wd l="8011" t="3490" r="8328" b="3648">and</wd>

<space/>

<wd l="8434" t="3509" r="9230" b="3648">recurrent</wd>

<space/>

<wd l="9341" t="3490" r="10498" b="3648">non-standard</wd>

<space/>

</ln>

<ln l="6125" t="3739" r="10483" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="3739" r="6658" b="3898">words</wd>

<space/>

<wd l="6787" t="3739" r="6955" b="3893">in</wd>

<space/>

<wd l="7070" t="3739" r="7531" b="3898">UGC</wd>

<space/>

<wd l="7661" t="3739" r="8419" b="3941">(Internet</wd>

<space/>

<wd l="8544" t="3739" r="9120" b="3941">slang),</wd>

<space/>

<wd l="9250" t="3739" r="9504" b="3898">for</wd>

<space/>

<wd l="9624" t="3739" r="10162" b="3898">which</wd>

<space/>

<wd l="10286" t="3787" r="10483" b="3898">an</wd>

<space/>

</ln>

<ln l="6130" t="3994" r="10488" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="3994" r="7042" b="4195">equivalent</wd>

<space/>

<wd l="7138" t="3994" r="7632" b="4152">exists</wd>

<space/>

<wd l="7733" t="3994" r="7901" b="4147">in</wd>

<space/>

<wd l="7997" t="3994" r="8266" b="4152">the</wd>

<space/>

<wd l="8371" t="3994" r="9115" b="4152">standard</wd>

<space/>

<wd l="9211" t="3994" r="10042" b="4195">language.</wd>

<space/>

<wd l="10152" t="3994" r="10488" b="4152">The</wd>

<space/>

</ln>

<ln l="6125" t="4248" r="10483" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="4248" r="7344" b="4406">normalization</wd>

<space/>

<wd l="7416" t="4248" r="7613" b="4406">of</wd>

<space/>

<wd l="7661" t="4248" r="8131" b="4406">OOV</wd>

<space/>

<wd l="8198" t="4248" r="8784" b="4440">words,</wd>

<space/>

<wd l="8856" t="4248" r="9696" b="4440">therefore,</wd>

<space/>

<wd l="9768" t="4248" r="10483" b="4450">depends</wd>

<space/>

</ln>

<ln l="6130" t="4498" r="10488" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="4546" r="6346" b="4656">on</wd>

<space/>

<wd l="6403" t="4498" r="7646" b="4699">distinguishing</wd>

<space/>

<wd l="7694" t="4498" r="8146" b="4656">these</wd>

<space/>

<wd l="8208" t="4498" r="9144" b="4699">categories,</wd>

<space/>

<wd l="9206" t="4546" r="9379" b="4656">as</wd>

<space/>

<wd l="9437" t="4498" r="9816" b="4699">they</wd>

<space/>

<wd l="9869" t="4498" r="10488" b="4699">require</wd>

<space/>

</ln>

<ln l="6130" t="4752" r="10488" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="4752" r="6883" b="4910">different</wd>

<space/>

<wd l="7162" t="4752" r="8174" b="4954">procedures:</wd>

<space/>

<wd l="8482" t="4752" r="9576" b="4954">misspellings</wd>

<space/>

<wd l="9864" t="4752" r="10488" b="4954">require</wd>

<space/>

</ln>

<ln l="6134" t="5006" r="10488" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6134" t="5006" r="6821" b="5208">spelling</wd>

<space/>

<wd l="7042" t="5006" r="7978" b="5198">correction,</wd>

<space/>

<wd l="8203" t="5006" r="8794" b="5165">named</wd>

<space/>

<wd l="9010" t="5006" r="9643" b="5165">entities</wd>

<space/>

<wd l="9864" t="5006" r="10488" b="5208">require</wd>

<space/>

</ln>

<ln l="6130" t="5261" r="10493" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="5261" r="7090" b="5419">conversion</wd>

<space/>

<wd l="7142" t="5280" r="7310" b="5419">to</wd>

<space/>

<wd l="7373" t="5309" r="8294" b="5462">uppercase,</wd>

<space/>

<wd l="8362" t="5261" r="8990" b="5462">foreign</wd>

<space/>

<wd l="9053" t="5261" r="9427" b="5419">loan</wd>

<space/>

<wd l="9480" t="5261" r="10013" b="5419">words</wd>

<space/>

<wd l="10070" t="5261" r="10493" b="5419">need</wd>

<space/>

</ln>

<ln l="6125" t="5510" r="10483" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="5530" r="6293" b="5669">to</wd>

<space/>

<wd l="6437" t="5510" r="6648" b="5669">be</wd>

<space/>

<wd l="6797" t="5510" r="7906" b="5712">incorporated</wd>

<space/>

<wd l="8045" t="5530" r="8213" b="5669">to</wd>

<space/>

<wd l="8362" t="5510" r="8626" b="5669">the</wd>

<space/>

<wd l="8779" t="5510" r="9470" b="5702">lexicon,</wd>

<space/>

<wd l="9629" t="5510" r="9946" b="5669">and</wd>

<space/>

<wd l="10085" t="5558" r="10483" b="5669">non-</wd>

</ln>

<ln l="6134" t="5765" r="10488" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6134" t="5765" r="6878" b="5923">standard</wd>

<space/>

<wd l="6998" t="5765" r="7531" b="5923">words</wd>

<space/>

<wd l="7661" t="5765" r="8285" b="5966">require</wd>

<space/>

<wd l="8419" t="5765" r="9437" b="5923">substitution</wd>

<space/>

<wd l="9571" t="5765" r="9826" b="5923">for</wd>

<space/>

<wd l="9950" t="5765" r="10488" b="5923">words</wd>

<space/>

</ln>

<ln l="6130" t="6019" r="8568" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="6019" r="6557" b="6178">from</wd>

<space/>

<wd l="6605" t="6019" r="6874" b="6178">the</wd>

<space/>

<wd l="6941" t="6019" r="7685" b="6178">standard</wd>

<space/>

<wd l="7738" t="6019" r="8568" b="6221">language.</wd>

</ln>

</para>

<para l="6120" t="6269" r="10493" b="9758" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="6269" r="10483" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6350" t="6269" r="6619" b="6422">An</wd>

<space/>

<wd l="6797" t="6269" r="7526" b="6470">in-depth</wd>

<space/>

<wd l="7699" t="6269" r="8400" b="6470">analysis</wd>

<space/>

<wd l="8573" t="6269" r="8774" b="6427">of</wd>

<space/>

<wd l="8923" t="6269" r="9192" b="6427">the</wd>

<space/>

<wd l="9384" t="6269" r="9840" b="6461">1,323</wd>

<space/>

<wd l="10032" t="6317" r="10483" b="6427">cases</wd>

<space/>

</ln>

<ln l="6130" t="6523" r="10483" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="6523" r="6960" b="6682">classified</wd>

<space/>

<wd l="7104" t="6571" r="7277" b="6682">as</wd>

<space/>

<wd l="7426" t="6523" r="8520" b="6725">misspellings</wd>

<space/>

<wd l="8664" t="6523" r="8890" b="6725">by</wd>

<space/>

<wd l="9029" t="6523" r="9427" b="6682">both</wd>

<space/>

<wd l="9576" t="6542" r="10483" b="6682">annotators</wd>

<space/>

</ln>

<ln l="6130" t="6778" r="10488" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="6778" r="6706" b="6979">(100%</wd>

<space/>

<wd l="6763" t="6778" r="6960" b="6936">of</wd>

<space/>

<wd l="6989" t="6778" r="8294" b="6936">inter-annotator</wd>

<space/>

<wd l="8342" t="6778" r="9322" b="6979">agreement)</wd>

<space/>

<wd l="9370" t="6778" r="10118" b="6936">revealed</wd>

<space/>

<wd l="10157" t="6778" r="10488" b="6936">that</wd>

<space/>

</ln>

<ln l="6130" t="7027" r="10493" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="7027" r="6432" b="7186">791</wd>

<space/>

<wd l="6509" t="7075" r="6936" b="7186">were</wd>

<space/>

<wd l="6994" t="7046" r="7517" b="7229">typos,</wd>

<space/>

<wd l="7579" t="7027" r="7886" b="7186">451</wd>

<space/>

<wd l="7963" t="7075" r="8390" b="7186">were</wd>

<space/>

<wd l="8443" t="7027" r="10493" b="7229">phonetically-motivated</wd>

<space/>

</ln>

<ln l="6130" t="7282" r="10483" b="7474" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="7330" r="6686" b="7474">errors,</wd>

<space/>

<wd l="6797" t="7282" r="6998" b="7440">64</wd>

<space/>

<wd l="7099" t="7330" r="7526" b="7440">were</wd>

<space/>

<wd l="7622" t="7282" r="8347" b="7440">misused</wd>

<space/>

<wd l="8438" t="7282" r="9235" b="7440">diacritics</wd>

<space/>

<wd l="9341" t="7282" r="9658" b="7440">and</wd>

<space/>

<wd l="9773" t="7282" r="9960" b="7435">14</wd>

<space/>

<wd l="10061" t="7330" r="10483" b="7440">were</wd>

<space/>

</ln>

<ln l="6120" t="7536" r="10488" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6120" t="7536" r="6936" b="7738">problems</wd>

<space/>

<wd l="7142" t="7536" r="7742" b="7694">related</wd>

<space/>

<wd l="7939" t="7555" r="8102" b="7694">to</wd>

<space/>

<wd l="8309" t="7536" r="8573" b="7694">the</wd>

<space/>

<wd l="8774" t="7555" r="9312" b="7694">recent</wd>

<space/>

<wd l="9509" t="7541" r="10488" b="7738">Portuguese</wd>

<space/>

</ln>

<ln l="6130" t="7790" r="10483" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="7790" r="7402" b="7992">orthographical</wd>

<space/>

<wd l="7493" t="7790" r="7968" b="7982">rules,</wd>

<space/>

<wd l="8064" t="7790" r="8659" b="7992">mostly</wd>

<space/>

<wd l="8750" t="7790" r="9658" b="7949">associated</wd>

<space/>

<wd l="9739" t="7790" r="10128" b="7949">with</wd>

<space/>

<wd l="10214" t="7790" r="10483" b="7949">the</wd>

<space/>

</ln>

<ln l="6125" t="8040" r="10488" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="8088" r="6418" b="8198">use</wd>

<space/>

<wd l="6538" t="8040" r="6739" b="8198">of</wd>

<space/>

<wd l="6826" t="8040" r="7474" b="8242">hyphen</wd>

<space/>

<wd l="7594" t="8040" r="7762" b="8194">in</wd>

<space/>

<wd l="7877" t="8040" r="8928" b="8242">compounds.</wd>

<space/>

<wd l="9053" t="8040" r="9293" b="8198">As</wd>

<space/>

<wd l="9418" t="8088" r="10488" b="8242">open-source</wd>

<space/>

</ln>

<ln l="6125" t="8294" r="10483" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="8299" r="7099" b="8496">Portuguese</wd>

<space/>

<wd l="7219" t="8294" r="7877" b="8496">spellers</wd>

<space/>

<wd l="7987" t="8294" r="8198" b="8453">do</wd>

<space/>

<wd l="8309" t="8314" r="8592" b="8453">not</wd>

<space/>

<wd l="8698" t="8294" r="9221" b="8453">tackle</wd>

<space/>

<wd l="9326" t="8294" r="10483" b="8496">phonetically-</wd>

</ln>

<ln l="6125" t="8549" r="10488" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="8549" r="7008" b="8707">motivated</wd>

<space/>

<wd l="7229" t="8549" r="8381" b="8750">misspellings,</wd>

<space/>

<wd l="8611" t="8597" r="8866" b="8707">we</wd>

<space/>

<wd l="9096" t="8549" r="9998" b="8707">undertook</wd>

<space/>

<wd l="10219" t="8549" r="10488" b="8707">the</wd>

<space/>

</ln>

<ln l="6130" t="8798" r="10488" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="8798" r="7262" b="9000">development</wd>

<space/>

<wd l="7320" t="8798" r="7522" b="8957">of</wd>

<space/>

<wd l="7560" t="8846" r="7656" b="8957">a</wd>

<space/>

<wd l="7699" t="8798" r="9043" b="9000">phonetic-based</wd>

<space/>

<wd l="9101" t="8798" r="9682" b="9000">speller</wd>

<space/>

<wd l="9739" t="8798" r="10488" b="9000">(Avanço</wd>

<space/>

</ln>

<ln l="6130" t="9053" r="10493" b="9254" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="9072" r="6283" b="9211">et</wd>

<space/>

<wd l="6365" t="9053" r="6619" b="9245">al.,</wd>

<space/>

<wd l="6706" t="9053" r="7267" b="9254">2014),</wd>

<space/>

<wd l="7349" t="9053" r="7882" b="9211">which</wd>

<space/>

<wd l="7963" t="9053" r="8746" b="9211">achieved</wd>

<space/>

<wd l="8822" t="9053" r="9490" b="9216">65.46%</wd>

<space/>

<wd l="9576" t="9053" r="9778" b="9211">of</wd>

<space/>

<wd l="9835" t="9053" r="10186" b="9211">first</wd>

<space/>

<wd l="10258" t="9053" r="10493" b="9211">hit</wd>

<space/>

</ln>

<ln l="6130" t="9307" r="10488" b="9509" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="9355" r="6950" b="9509">accuracy,</wd>

<space/>

<wd l="7104" t="9307" r="7723" b="9509">against</wd>

<space/>

<wd l="7862" t="9307" r="8539" b="9470">46.94%</wd>

<space/>

<wd l="8688" t="9307" r="8890" b="9466">of</wd>

<space/>

<wd l="9005" t="9307" r="9274" b="9466">the</wd>

<space/>

<wd l="9418" t="9355" r="10488" b="9509">open-source</wd>

<space/>

</ln>

<ln l="6134" t="9538" r="7454" b="9758" baseLine="9705">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6134" t="9557" r="6715" b="9758">speller</wd>

<space/>

</run>

<wd l="6768" t="9538" r="7454" b="9758"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Aspell</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">2</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6120" t="9811" r="10512" b="14813" alignment="justified" spaceBefore="6" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="9811" r="10493" b="10013" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6350" t="9811" r="7003" b="9970">Further</wd>

<space/>

<wd l="7075" t="9811" r="7776" b="10013">analysis</wd>

<space/>

<wd l="7858" t="9811" r="8059" b="9970">of</wd>

<space/>

<wd l="8107" t="9811" r="8376" b="9970">the</wd>

<space/>

<wd l="8458" t="9859" r="9034" b="10013">corpus</wd>

<space/>

<wd l="9115" t="9811" r="9384" b="9970">led</wd>

<space/>

<wd l="9451" t="9859" r="9648" b="9970">us</wd>

<space/>

<wd l="9720" t="9830" r="9888" b="9970">to</wd>

<space/>

<wd l="9965" t="9811" r="10493" b="10013">verify</wd>

<space/>

</ln>

<ln l="6125" t="10066" r="10483" b="10267" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="10066" r="6456" b="10224">that</wd>

<space/>

<wd l="6523" t="10114" r="7008" b="10267">many</wd>

<space/>

<wd l="7075" t="10066" r="7608" b="10224">words</wd>

<space/>

<wd l="7680" t="10066" r="8011" b="10224">that</wd>

<space/>

<wd l="8078" t="10066" r="8702" b="10267">require</wd>

<space/>

<wd l="8770" t="10066" r="9994" b="10224">normalization</wd>

<space/>

<wd l="10061" t="10114" r="10483" b="10224">were</wd>

<space/>

</ln>

<ln l="6125" t="10320" r="10493" b="10522" baseLine="10469" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="10339" r="6408" b="10478">not</wd>

<space/>

<wd l="6624" t="10320" r="7382" b="10478">included</wd>

<space/>

<wd l="7598" t="10368" r="8189" b="10522">among</wd>

<space/>

<wd l="8405" t="10320" r="8674" b="10478">the</wd>

<space/>

<wd l="8894" t="10320" r="9365" b="10478">OOV</wd>

<space/>

<wd l="9586" t="10320" r="10171" b="10512">words,</wd>

<space/>

<wd l="10397" t="10368" r="10493" b="10478">a</wd>

<space/>

</ln>

<ln l="6120" t="10570" r="10483" b="10771" baseLine="10723">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6120" t="10570" r="7262" b="10771">phenomenon</wd>

<space/>

<wd l="7392" t="10570" r="7987" b="10728">known</wd>

<space/>

<wd l="8122" t="10618" r="8294" b="10728">as</wd>

<space/>

</run>

<wd l="8434" t="10570" r="9384" b="10728"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“real</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">-</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">word</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="9514" t="10570" r="10162" b="10728">errors”.</wd>

<space/>

<wd l="10310" t="10574" r="10483" b="10723">In</wd>

<space/>

</run>

</ln>

<ln l="6125" t="10824" r="10488" b="11026" baseLine="10978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="10829" r="7099" b="11026">Portuguese</wd>

<space/>

<wd l="7147" t="10824" r="7584" b="10982">there</wd>

<space/>

<wd l="7637" t="10872" r="7901" b="10982">are</wd>

<space/>

<wd l="7949" t="10824" r="8558" b="10982">around</wd>

<space/>

<wd l="8606" t="10824" r="9202" b="11016">25,000</wd>

<space/>

<wd l="9245" t="10824" r="9677" b="11026">pairs</wd>

<space/>

<wd l="9730" t="10824" r="9931" b="10982">of</wd>

<space/>

<wd l="9955" t="10824" r="10488" b="10982">words</wd>

<space/>

</ln>

<ln l="6125" t="11078" r="10483" b="11280" baseLine="11227" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="11078" r="6456" b="11237">that</wd>

<space/>

<wd l="6528" t="11126" r="6792" b="11237">are</wd>

<space/>

<wd l="6864" t="11078" r="8040" b="11280">distinguished</wd>

<space/>

<wd l="8107" t="11078" r="8496" b="11280">only</wd>

<space/>

<wd l="8558" t="11078" r="8784" b="11280">by</wd>

<space/>

<wd l="8856" t="11078" r="9653" b="11237">diacritics</wd>

<space/>

<wd l="9730" t="11078" r="10094" b="11270">and,</wd>

<space/>

<wd l="10171" t="11078" r="10483" b="11237">due</wd>

<space/>

</ln>

<ln l="6125" t="11328" r="10478" b="11530" baseLine="11482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="11347" r="6293" b="11486">to</wd>

<space/>

<wd l="6384" t="11328" r="6653" b="11486">the</wd>

<space/>

<wd l="6749" t="11328" r="7666" b="11530">systematic</wd>

<space/>

<wd l="7766" t="11328" r="8453" b="11486">absence</wd>

<space/>

<wd l="8549" t="11328" r="8750" b="11486">of</wd>

<space/>

<wd l="8818" t="11328" r="9614" b="11486">diacritics</wd>

<space/>

<wd l="9715" t="11328" r="9878" b="11482">in</wd>

<space/>

<wd l="9970" t="11328" r="10478" b="11520">UGC,</wd>

<space/>

</ln>

<ln l="6134" t="11582" r="10483" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="11582" r="6528" b="11741">such</wd>

<space/>

<wd l="6662" t="11582" r="7090" b="11784">pairs</wd>

<space/>

<wd l="7238" t="11582" r="7440" b="11741">of</wd>

<space/>

<wd l="7555" t="11582" r="8088" b="11741">words</wd>

<space/>

<wd l="8232" t="11582" r="8846" b="11741">remain</wd>

<space/>

<wd l="8990" t="11582" r="10483" b="11784">indistinguishable</wd>

<space/>

</ln>

<ln l="6125" t="11837" r="10488" b="12029" baseLine="11986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="11837" r="6797" b="11995">without</wd>

<space/>

<wd l="7109" t="11837" r="8016" b="11995">contextual</wd>

<space/>

<wd l="8333" t="11837" r="9418" b="12029">information,</wd>

<space/>

<wd l="9734" t="11885" r="9907" b="11995">as</wd>

<space/>

<wd l="10219" t="11837" r="10488" b="11995">the</wd>

<space/>

</ln>

<ln l="6125" t="12091" r="10493" b="12293" baseLine="12240">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6125" t="12091" r="7205" b="12293">homographs</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="7277" t="12091" r="7594" b="12293">(eg:</wd>

<space/>

<wd l="7675" t="12091" r="8390" b="12250">“varias”</wd>

<space/>

<wd l="8453" t="12091" r="8813" b="12293">(=to</wd>

<space/>

<wd l="8875" t="12139" r="9264" b="12293">vary</wd>

<space/>

<wd l="9326" t="12091" r="9494" b="12245">in</wd>

<space/>

<wd l="9552" t="12091" r="9816" b="12250">the</wd>

<space/>

<wd l="9888" t="12091" r="10493" b="12250">second</wd>

<space/>

</run>

</ln>

<ln l="6120" t="12341" r="10488" b="12542" baseLine="12494">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6120" t="12389" r="6710" b="12542">person</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6782" t="12341" r="7483" b="12542">singular</wd>

<space/>

<wd l="7550" t="12341" r="7714" b="12494">in</wd>

<space/>

<wd l="7776" t="12341" r="8045" b="12499">the</wd>

<space/>

<wd l="8102" t="12360" r="8741" b="12542">present</wd>

<space/>

<wd l="8798" t="12341" r="9322" b="12542">tense)</wd>

<space/>

<wd l="9394" t="12341" r="9710" b="12499">and</wd>

<space/>

<wd l="9773" t="12341" r="10488" b="12499">“várias”</wd>

<space/>

</run>

</ln>

<ln l="6130" t="12595" r="10483" b="12797" baseLine="12744" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="12595" r="7138" b="12797">(=several)).</wd>

<space/>

<wd l="7416" t="12595" r="7925" b="12754">There</wd>

<space/>

<wd l="8203" t="12643" r="8462" b="12754">are</wd>

<space/>

<wd l="8736" t="12595" r="9082" b="12754">also</wd>

<space/>

<wd l="9365" t="12643" r="9811" b="12754">some</wd>

<space/>

<wd l="10085" t="12643" r="10483" b="12754">non-</wd>

</ln>

<ln l="6130" t="12850" r="10483" b="13051" baseLine="12998">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6130" t="12850" r="7258" b="13008">conventional</wd>

<space/>

<wd l="7330" t="12850" r="7862" b="13008">words</wd>

<space/>

<wd l="7939" t="12850" r="8366" b="13008">from</wd>

<space/>

</run>

<wd l="8438" t="12854" r="9120" b="13008"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Int</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">ernet</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="9197" t="12850" r="9653" b="13051">slang</wd>

<space/>

<wd l="9725" t="12850" r="10042" b="13051">(eg.</wd>

<space/>

<wd l="10128" t="12850" r="10483" b="13008">“vai</wd>

<space/>

</run>

</ln>

<ln l="6125" t="13099" r="10483" b="13301" baseLine="13253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="13099" r="7114" b="13258">testa”=“vai</wd>

<space/>

<wd l="7186" t="13099" r="8218" b="13258">testar”=will</wd>

<space/>

<wd l="8290" t="13099" r="8736" b="13301">test))</wd>

<space/>

<wd l="8818" t="13099" r="9134" b="13258">and</wd>

<space/>

<wd l="9197" t="13099" r="9787" b="13258">named</wd>

<space/>

<wd l="9854" t="13099" r="10483" b="13258">entities</wd>

<space/>

</ln>

<ln l="6130" t="13354" r="10488" b="13555" baseLine="13507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="13354" r="6446" b="13555">(eg.</wd>

<space/>

<wd l="6542" t="13354" r="6811" b="13512">the</wd>

<space/>

<wd l="6898" t="13354" r="7829" b="13555">companies</wd>

<space/>

<wd l="7925" t="13354" r="8189" b="13546">Oi,</wd>

<space/>

<wd l="8285" t="13354" r="8760" b="13512">Claro</wd>

<space/>

<wd l="8851" t="13354" r="9173" b="13512">and</wd>

<space/>

<wd l="9259" t="13354" r="9859" b="13555">Sadia),</wd>

<space/>

<wd l="9950" t="13354" r="10488" b="13512">which</wd>

<space/>

</ln>

<ln l="6125" t="13608" r="10478" b="13810" baseLine="13757">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6125" t="13608" r="6658" b="13766">match</wd>

<space/>

<wd l="6725" t="13608" r="7421" b="13810">existing</wd>

<space/>

<wd l="7478" t="13608" r="8011" b="13766">words</wd>

<space/>

</run>

<wd l="8083" t="13608" r="9691" b="13810"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">(</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“testa”=</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">forehead;</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="9773" t="13608" r="10478" b="13800">“oi”=hi;</wd>

<space/>

</run>

</ln>

<ln l="6130" t="13858" r="10478" b="14059" baseLine="14011" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="13858" r="7334" b="14059">“claro”=light,</wd>

<space/>

<wd l="7406" t="13858" r="7877" b="14050">clear;</wd>

<space/>

<wd l="7958" t="13858" r="9490" b="14059">“sadia”=healthy).</wd>

<space/>

<wd l="9571" t="13858" r="10478" b="14050">Therefore,</wd>

<space/>

</ln>

<ln l="6130" t="14112" r="10488" b="14270" baseLine="14266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="14112" r="6283" b="14266">if</wd>

<space/>

<wd l="6365" t="14112" r="6758" b="14270">such</wd>

<space/>

<wd l="6854" t="14112" r="7392" b="14270">words</wd>

<space/>

<wd l="7498" t="14160" r="7757" b="14270">are</wd>

<space/>

<wd l="7862" t="14112" r="8606" b="14270">identical</wd>

<space/>

<wd l="8707" t="14131" r="8875" b="14270">to</wd>

<space/>

<wd l="8981" t="14112" r="9432" b="14270">other</wd>

<space/>

<wd l="9523" t="14112" r="10056" b="14270">words</wd>

<space/>

<wd l="10157" t="14112" r="10488" b="14270">that</wd>

<space/>

</ln>

<ln l="6120" t="14366" r="10483" b="14568" baseLine="14515" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="14366" r="6725" b="14568">belong</wd>

<space/>

<wd l="6806" t="14386" r="6974" b="14525">to</wd>

<space/>

<wd l="7061" t="14366" r="7330" b="14525">the</wd>

<space/>

<wd l="7416" t="14366" r="8107" b="14558">lexicon,</wd>

<space/>

<wd l="8198" t="14366" r="8578" b="14568">they</wd>

<space/>

<wd l="8664" t="14414" r="8928" b="14525">are</wd>

<space/>

<wd l="9014" t="14386" r="9298" b="14525">not</wd>

<space/>

<wd l="9384" t="14366" r="10224" b="14525">identified</wd>

<space/>

<wd l="10306" t="14414" r="10483" b="14525">as</wd>

<space/>

</ln>

<ln l="6130" t="14621" r="10512" b="14813" baseLine="14770" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="14621" r="6600" b="14779">OOV</wd>

<space/>

<wd l="6662" t="14621" r="7243" b="14779">words.</wd>

<space/>

<wd l="7315" t="14626" r="7622" b="14779">For</wd>

<space/>

<wd l="7680" t="14621" r="7992" b="14779">this</wd>

<space/>

<wd l="8054" t="14669" r="8674" b="14813">reason,</wd>

<space/>

<wd l="8741" t="14621" r="9010" b="14779">the</wd>

<space/>

<wd l="9077" t="14621" r="10243" b="14779">identification</wd>

<space/>

<wd l="10310" t="14621" r="10512" b="14779">of</wd>

</ln>

</para>

</column>

</section>

<dd l="1416" t="15736" r="10517" b="15977">

<para l="5800" t="15792" r="6148" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15792" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="43">

<wd l="5866" t="15792" r="6082" b="15946">40</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1402" marginTop="1417" marginRight="1385" marginBottom="1292" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1402" t="1417" r="10524" b="15235">

<column l="1402" t="1417" r="5813" b="15235">

<para l="1421" t="1464" r="5784" b="1920" alignment="justified" spaceBefore="1" lsp="exactly" lspExact="253" language="en">

<ln l="1421" t="1464" r="5784" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1464" r="1987" b="1622">tokens</wd>

<space/>

<wd l="2035" t="1464" r="2366" b="1622">that</wd>

<space/>

<wd l="2410" t="1464" r="3029" b="1666">require</wd>

<space/>

<wd l="3077" t="1464" r="4296" b="1622">normalization</wd>

<space/>

<wd l="4344" t="1464" r="4483" b="1622">is</wd>

<space/>

<wd l="4531" t="1512" r="4978" b="1622">more</wd>

<space/>

<wd l="5030" t="1464" r="5784" b="1666">complex</wd>

<space/>

</ln>

<ln l="1426" t="1718" r="4786" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="1718" r="1589" b="1872">in</wd>

<space/>

<wd l="1646" t="1718" r="2107" b="1877">UGC</wd>

<space/>

<wd l="2165" t="1718" r="2539" b="1877">than</wd>

<space/>

<wd l="2602" t="1718" r="2770" b="1872">in</wd>

<space/>

<wd l="2822" t="1718" r="3091" b="1877">the</wd>

<space/>

<wd l="3158" t="1718" r="3898" b="1877">standard</wd>

<space/>

<wd l="3950" t="1718" r="4786" b="1920">language.</wd>

</ln>

</para>

<para l="1416" t="1968" r="5808" b="8750" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="1968" r="5784" b="2126" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="1968" r="1987" b="2126">The</wd>

<space/>

<wd l="2117" t="1968" r="3470" b="2126">unconventional</wd>

<space/>

<wd l="3605" t="2016" r="3898" b="2126">use</wd>

<space/>

<wd l="4032" t="1968" r="4234" b="2126">of</wd>

<space/>

<wd l="4344" t="2016" r="4714" b="2126">case</wd>

<space/>

<wd l="4853" t="1968" r="4992" b="2126">is</wd>

<space/>

<wd l="5126" t="1968" r="5784" b="2126">another</wd>

<space/>

</ln>

<ln l="1426" t="2222" r="5784" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2222" r="2587" b="2381">characteristic</wd>

<space/>

<wd l="2779" t="2222" r="2981" b="2381">of</wd>

<space/>

<wd l="3139" t="2222" r="3600" b="2381">UGC</wd>

<space/>

<wd l="3787" t="2222" r="4579" b="2381">observed</wd>

<space/>

<wd l="4762" t="2222" r="4930" b="2376">in</wd>

<space/>

<wd l="5102" t="2222" r="5784" b="2424">product</wd>

<space/>

</ln>

<ln l="1421" t="2477" r="5789" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="2477" r="2146" b="2635">reviews.</wd>

<space/>

<wd l="2251" t="2477" r="3250" b="2678">Frequently,</wd>

<space/>

<wd l="3350" t="2477" r="3926" b="2678">capital</wd>

<space/>

<wd l="4027" t="2477" r="4555" b="2635">letters</wd>

<space/>

<wd l="4651" t="2525" r="4915" b="2635">are</wd>

<space/>

<wd l="5006" t="2496" r="5290" b="2635">not</wd>

<space/>

<wd l="5381" t="2477" r="5789" b="2635">used</wd>

<space/>

</ln>

<ln l="1426" t="2726" r="5774" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2726" r="1824" b="2885">after</wd>

<space/>

<wd l="1925" t="2726" r="2971" b="2928">punctuation</wd>

<space/>

<wd l="3086" t="2774" r="3259" b="2885">as</wd>

<space/>

<wd l="3374" t="2726" r="3749" b="2885">well</wd>

<space/>

<wd l="3864" t="2774" r="4037" b="2885">as</wd>

<space/>

<wd l="4157" t="2726" r="4411" b="2885">for</wd>

<space/>

<wd l="4512" t="2774" r="5093" b="2928">proper</wd>

<space/>

<wd l="5198" t="2774" r="5774" b="2885">nouns.</wd>

<space/>

</ln>

<ln l="1426" t="2981" r="5774" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2981" r="2467" b="3182">Conversely,</wd>

<space/>

<wd l="2539" t="3029" r="3302" b="3139">common</wd>

<space/>

<wd l="3360" t="2981" r="3893" b="3139">words</wd>

<space/>

<wd l="3960" t="3029" r="4224" b="3139">are</wd>

<space/>

<wd l="4286" t="2981" r="4906" b="3139">written</wd>

<space/>

<wd l="4973" t="2981" r="5141" b="3134">in</wd>

<space/>

<wd l="5198" t="2981" r="5774" b="3182">capital</wd>

<space/>

</ln>

<ln l="1426" t="3235" r="5779" b="3437" baseLine="3384">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1426" t="3235" r="1954" b="3394">letters</wd>

<space/>

<wd l="2074" t="3254" r="2237" b="3394">to</wd>

<space/>

<wd l="2362" t="3235" r="3283" b="3437">emphasize</wd>

<space/>

<wd l="3408" t="3283" r="3605" b="3394">an</wd>

<space/>

<wd l="3730" t="3235" r="4402" b="3437">opinion</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="4522" t="3235" r="4838" b="3437">(eg.</wd>

<space/>

<wd l="4973" t="3235" r="5779" b="3394">“MUITO</wd>

<space/>

</run>

</ln>

<ln l="1421" t="3490" r="5779" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="3490" r="2016" b="3648">BOM”</wd>

<space/>

<wd l="2102" t="3547" r="2227" b="3595">=</wd>

<space/>

<wd l="2314" t="3494" r="2914" b="3648">VERY</wd>

<space/>

<wd l="3005" t="3490" r="3754" b="3691">GOOD).</wd>

<space/>

<wd l="3854" t="3490" r="4358" b="3648">There</wd>

<space/>

<wd l="4450" t="3538" r="4714" b="3648">are</wd>

<space/>

<wd l="4805" t="3490" r="5150" b="3648">also</wd>

<space/>

<wd l="5242" t="3490" r="5779" b="3648">whole</wd>

<space/>

</ln>

<ln l="1421" t="3739" r="5789" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="3739" r="2102" b="3898">reviews</wd>

<space/>

<wd l="2189" t="3739" r="2813" b="3898">written</wd>

<space/>

<wd l="2899" t="3739" r="3067" b="3893">in</wd>

<space/>

<wd l="3149" t="3787" r="4027" b="3941">uppercase</wd>

<space/>

<wd l="4118" t="3787" r="4301" b="3898">or</wd>

<space/>

<wd l="4387" t="3739" r="4555" b="3893">in</wd>

<space/>

<wd l="4646" t="3739" r="5515" b="3898">lowercase</wd>

<space/>

<wd l="5606" t="3787" r="5789" b="3898">or</wd>

<space/>

</ln>

<ln l="1426" t="3994" r="5779" b="4186" baseLine="4142">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1426" t="4042" r="1834" b="4152">even</wd>

<space/>

<wd l="1901" t="4042" r="1997" b="4152">a</wd>

<space/>

<wd l="2054" t="3994" r="2395" b="4147">mix</wd>

<space/>

</run>

<wd l="2462" t="4042" r="2688" b="4152"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">as</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="2770" t="3994" r="3144" b="4147">“Fiz</wd>

<space/>

<wd l="3211" t="3994" r="3898" b="4152">Contato</wd>

<space/>

<wd l="3970" t="4042" r="4349" b="4152">com</wd>

<space/>

<wd l="4411" t="4042" r="4512" b="4152">o</wd>

<space/>

<wd l="4579" t="3994" r="5496" b="4186">Vendedor,</wd>

<space/>

<wd l="5563" t="4042" r="5779" b="4152">no</wd>

<space/>

</run>

</ln>

<ln l="1426" t="4248" r="5789" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="4248" r="1795" b="4450">qual</wd>

<space/>

<wd l="1920" t="4248" r="2170" b="4406">ele</wd>

<space/>

<wd l="2294" t="4253" r="2549" b="4406">De</wd>

<space/>

<wd l="2674" t="4248" r="3192" b="4406">forma</wd>

<space/>

<wd l="3312" t="4248" r="4195" b="4406">Discarada</wd>

<space/>

<wd l="4320" t="4248" r="5011" b="4406">informa</wd>

<space/>

<wd l="5136" t="4296" r="5386" b="4406">ser</wd>

<space/>

<wd l="5506" t="4296" r="5789" b="4406">um</wd>

<space/>

</ln>

<ln l="1416" t="4498" r="5784" b="4699" baseLine="4651">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1416" t="4498" r="2098" b="4699">produto</wd>

<space/>

</run>

<wd l="2218" t="4498" r="3442" b="4656"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">ORIGINAL</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">!”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="3562" t="4498" r="4358" b="4699">(literally:</wd>

<space/>

<wd l="4488" t="4498" r="4987" b="4656">Make</wd>

<space/>

<wd l="5102" t="4498" r="5784" b="4656">Contact</wd>

<space/>

</run>

</ln>

<ln l="1421" t="4752" r="5779" b="4910" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="4752" r="1810" b="4910">with</wd>

<space/>

<wd l="1934" t="4800" r="2030" b="4910">a</wd>

<space/>

<wd l="2155" t="4752" r="2659" b="4910">Seller</wd>

<space/>

<wd l="2779" t="4752" r="3096" b="4910">and</wd>

<space/>

<wd l="3211" t="4752" r="3418" b="4910">he</wd>

<space/>

<wd l="3542" t="4752" r="4214" b="4910">informs</wd>

<space/>

<wd l="4344" t="4757" r="4517" b="4906">In</wd>

<space/>

<wd l="4642" t="4800" r="4738" b="4910">a</wd>

<space/>

<wd l="4862" t="4752" r="5779" b="4910">Shameless</wd>

<space/>

</ln>

<ln l="1421" t="5006" r="5784" b="5208" baseLine="5155">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1421" t="5054" r="2078" b="5165">manner</wd>

<space/>

<wd l="2170" t="5026" r="2338" b="5165">to</wd>

<space/>

<wd l="2434" t="5006" r="2640" b="5165">be</wd>

<space/>

<wd l="2741" t="5054" r="2947" b="5165">an</wd>

<space/>

<wd l="3043" t="5006" r="4099" b="5165">ORIGINAL</wd>

<space/>

<wd l="4186" t="5006" r="5150" b="5208">product!”).</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="5266" t="5006" r="5784" b="5165">These</wd>

<space/>

</run>

</ln>

<ln l="1416" t="5261" r="5808" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="5261" r="2434" b="5462">phenomena</wd>

<space/>

<wd l="2491" t="5309" r="2976" b="5419">cause</wd>

<space/>

<wd l="3029" t="5261" r="3845" b="5462">problems</wd>

<space/>

<wd l="3912" t="5261" r="4166" b="5419">for</wd>

<space/>

<wd l="4219" t="5261" r="4488" b="5419">the</wd>

<space/>

<wd l="4546" t="5261" r="5549" b="5462">recognition</wd>

<space/>

<wd l="5606" t="5261" r="5808" b="5419">of</wd>

<space/>

</ln>

<ln l="1421" t="5510" r="5808" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5510" r="2006" b="5669">named</wd>

<space/>

<wd l="2165" t="5510" r="2794" b="5669">entities</wd>

<space/>

<wd l="2962" t="5510" r="3278" b="5669">and</wd>

<space/>

<wd l="3437" t="5510" r="3691" b="5669">for</wd>

<space/>

<wd l="3845" t="5510" r="4114" b="5669">the</wd>

<space/>

<wd l="4282" t="5510" r="5443" b="5712">segmentation</wd>

<space/>

<wd l="5606" t="5510" r="5808" b="5669">of</wd>

<space/>

</ln>

<ln l="1430" t="5765" r="5784" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="5784" r="2256" b="5923">sentences</wd>

<space/>

<wd l="2338" t="5765" r="2779" b="5923">since</wd>

<space/>

<wd l="2846" t="5765" r="3240" b="5923">both</wd>

<space/>

<wd l="3307" t="5765" r="3744" b="5923">tasks</wd>

<space/>

<wd l="3816" t="5813" r="4109" b="5923">use</wd>

<space/>

<wd l="4181" t="5765" r="4757" b="5966">capital</wd>

<space/>

<wd l="4834" t="5765" r="5362" b="5923">letters</wd>

<space/>

<wd l="5438" t="5813" r="5611" b="5923">as</wd>

<space/>

<wd l="5688" t="5813" r="5784" b="5923">a</wd>

<space/>

</ln>

<ln l="1426" t="6019" r="5789" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6019" r="1829" b="6178">clue.</wd>

<space/>

<wd l="1896" t="6019" r="3134" b="6178">Lexical-based</wd>

<space/>

<wd l="3197" t="6019" r="4013" b="6221">strategies</wd>

<space/>

<wd l="4080" t="6067" r="4382" b="6178">can</wd>

<space/>

<wd l="4440" t="6019" r="4814" b="6221">help</wd>

<space/>

<wd l="4872" t="6038" r="5040" b="6178">to</wd>

<space/>

<wd l="5107" t="6019" r="5789" b="6221">identify</wd>

<space/>

</ln>

<ln l="1421" t="6269" r="5779" b="6461" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="6269" r="2006" b="6427">named</wd>

<space/>

<wd l="2069" t="6269" r="2698" b="6427">entities</wd>

<space/>

<wd l="2765" t="6269" r="3389" b="6427">written</wd>

<space/>

<wd l="3451" t="6269" r="3619" b="6422">in</wd>

<space/>

<wd l="3686" t="6269" r="4603" b="6427">lowercase.</wd>

<space/>

<wd l="4680" t="6274" r="5530" b="6461">However,</wd>

<space/>

<wd l="5602" t="6317" r="5779" b="6427">as</wd>

<space/>

</ln>

<ln l="1416" t="6523" r="5784" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="6571" r="1997" b="6725">proper</wd>

<space/>

<wd l="2045" t="6571" r="2602" b="6682">names</wd>

<space/>

<wd l="2659" t="6523" r="2981" b="6682">and</wd>

<space/>

<wd l="3029" t="6571" r="3874" b="6725">acronyms</wd>

<space/>

<wd l="3931" t="6571" r="4195" b="6682">are</wd>

<space/>

<wd l="4253" t="6523" r="4421" b="6677">in</wd>

<space/>

<wd l="4474" t="6571" r="4896" b="6725">open</wd>

<space/>

<wd l="4949" t="6523" r="5606" b="6715">classes,</wd>

<space/>

<wd l="5664" t="6523" r="5784" b="6682">it</wd>

<space/>

</ln>

<ln l="1426" t="6778" r="5784" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6778" r="1560" b="6936">is</wd>

<space/>

<wd l="1622" t="6778" r="2472" b="6936">infeasible</wd>

<space/>

<wd l="2525" t="6797" r="2693" b="6936">to</wd>

<space/>

<wd l="2750" t="6797" r="3552" b="6936">construct</wd>

<space/>

<wd l="3610" t="6826" r="3706" b="6936">a</wd>

<space/>

<wd l="3758" t="6778" r="5083" b="6979">comprehensive</wd>

<space/>

<wd l="5141" t="6778" r="5784" b="6936">lexicon</wd>

<space/>

</ln>

<ln l="1426" t="7027" r="5784" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7027" r="1680" b="7186">for</wd>

<space/>

<wd l="1757" t="7027" r="2242" b="7186">them.</wd>

<space/>

<wd l="2338" t="7027" r="3398" b="7229">Fortunately,</wd>

<space/>

<wd l="3490" t="7027" r="3758" b="7186">the</wd>

<space/>

<wd l="3840" t="7027" r="4517" b="7229">product</wd>

<space/>

<wd l="4603" t="7027" r="5280" b="7186">reviews</wd>

<space/>

<wd l="5371" t="7027" r="5784" b="7186">have</wd>

<space/>

</ln>

<ln l="1421" t="7282" r="5779" b="7440" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="7282" r="2218" b="7440">metadata</wd>

<space/>

<wd l="2285" t="7282" r="2616" b="7440">that</wd>

<space/>

<wd l="2688" t="7282" r="3331" b="7440">contain</wd>

<space/>

<wd l="3403" t="7301" r="3830" b="7440">most</wd>

<space/>

<wd l="3902" t="7282" r="4104" b="7440">of</wd>

<space/>

<wd l="4147" t="7282" r="4416" b="7440">the</wd>

<space/>

<wd l="4488" t="7282" r="5078" b="7440">named</wd>

<space/>

<wd l="5150" t="7282" r="5779" b="7440">entities</wd>

<space/>

</ln>

<ln l="1426" t="7536" r="5779" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7536" r="1939" b="7694">found</wd>

<space/>

<wd l="2074" t="7536" r="2242" b="7690">in</wd>

<space/>

<wd l="2376" t="7536" r="2645" b="7694">the</wd>

<space/>

<wd l="2784" t="7536" r="3672" b="7738">respective</wd>

<space/>

<wd l="3811" t="7555" r="4277" b="7728">texts,</wd>

<space/>

<wd l="4421" t="7536" r="4958" b="7694">which</wd>

<space/>

<wd l="5098" t="7536" r="5467" b="7738">help</wd>

<space/>

<wd l="5611" t="7555" r="5779" b="7694">to</wd>

<space/>

</ln>

<ln l="1426" t="7790" r="5789" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7810" r="2227" b="7949">construct</wd>

<space/>

<wd l="2314" t="7838" r="2410" b="7949">a</wd>

<space/>

<wd l="2491" t="7790" r="4123" b="7992">domain-dependent</wd>

<space/>

<wd l="4210" t="7790" r="4853" b="7949">lexicon</wd>

<space/>

<wd l="4939" t="7790" r="5141" b="7949">of</wd>

<space/>

<wd l="5198" t="7790" r="5789" b="7949">named</wd>

<space/>

</ln>

<ln l="1426" t="8040" r="5774" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8040" r="2098" b="8198">entities.</wd>

<space/>

<wd l="2170" t="8040" r="2510" b="8198">The</wd>

<space/>

<wd l="2573" t="8040" r="3312" b="8242">opposite</wd>

<space/>

<wd l="3370" t="8040" r="4109" b="8242">problem</wd>

<space/>

<wd l="4166" t="8040" r="4517" b="8198">also</wd>

<space/>

<wd l="4584" t="8040" r="5126" b="8232">exists,</wd>

<space/>

<wd l="5189" t="8040" r="5520" b="8198">that</wd>

<space/>

<wd l="5582" t="8040" r="5774" b="8232">is,</wd>

<space/>

</ln>

<ln l="1421" t="8294" r="5789" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8314" r="1584" b="8453">to</wd>

<space/>

<wd l="1646" t="8294" r="2213" b="8453">decide</wd>

<space/>

<wd l="2266" t="8294" r="2976" b="8453">whether</wd>

<space/>

<wd l="3024" t="8342" r="3120" b="8453">a</wd>

<space/>

<wd l="3168" t="8294" r="3624" b="8453">word</wd>

<space/>

<wd l="3672" t="8294" r="4291" b="8453">written</wd>

<space/>

<wd l="4344" t="8294" r="4512" b="8448">in</wd>

<space/>

<wd l="4565" t="8342" r="5438" b="8496">uppercase</wd>

<space/>

<wd l="5496" t="8294" r="5635" b="8453">is</wd>

<space/>

<wd l="5693" t="8342" r="5789" b="8453">a</wd>

<space/>

</ln>

<ln l="1421" t="8549" r="3178" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8549" r="2006" b="8707">named</wd>

<space/>

<wd l="2064" t="8549" r="2563" b="8750">entity</wd>

<space/>

<wd l="2621" t="8597" r="2803" b="8707">or</wd>

<space/>

<wd l="2856" t="8568" r="3178" b="8707">not.</wd>

</ln>

</para>

<para l="1402" t="8798" r="5803" b="12542" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="8798" r="5784" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="8798" r="2357" b="9000">Missing</wd>

<space/>

<wd l="2554" t="8798" r="3600" b="9000">punctuation</wd>

<space/>

<wd l="3806" t="8798" r="3946" b="8957">is</wd>

<space/>

<wd l="4157" t="8798" r="4814" b="8957">another</wd>

<space/>

<wd l="5021" t="8846" r="5784" b="8957">common</wd>

<space/>

</ln>

<ln l="1426" t="9053" r="5784" b="9254" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9053" r="2587" b="9211">characteristic</wd>

<space/>

<wd l="2861" t="9053" r="3062" b="9211">of</wd>

<space/>

<wd l="3302" t="9053" r="3979" b="9254">product</wd>

<space/>

<wd l="4243" t="9053" r="4978" b="9245">reviews,</wd>

<space/>

<wd l="5246" t="9053" r="5784" b="9211">which</wd>

<space/>

</ln>

<ln l="1402" t="9307" r="5774" b="9509" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1402" t="9307" r="2333" b="9509">jeopardize</wd>

<space/>

<wd l="2472" t="9326" r="3221" b="9466">sentence</wd>

<space/>

<wd l="3350" t="9307" r="3672" b="9466">and</wd>

<space/>

<wd l="3797" t="9307" r="4339" b="9466">clause</wd>

<space/>

<wd l="4478" t="9307" r="5774" b="9509">segmentations.</wd>

<space/>

</ln>

<ln l="1430" t="9557" r="5789" b="9758" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="9557" r="1915" b="9715">Some</wd>

<space/>

<wd l="1992" t="9557" r="2674" b="9715">reviews</wd>

<space/>

<wd l="2750" t="9557" r="3629" b="9758">reproduce</wd>

<space/>

<wd l="3710" t="9605" r="3806" b="9715">a</wd>

<space/>

<wd l="3874" t="9557" r="4272" b="9715">kind</wd>

<space/>

<wd l="4344" t="9557" r="4546" b="9715">of</wd>

<space/>

<wd l="4594" t="9557" r="5789" b="9758">uninterrupted</wd>

<space/>

</ln>

<ln l="1430" t="9811" r="5779" b="10013" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="9830" r="2006" b="9970">stream</wd>

<space/>

<wd l="2112" t="9811" r="2314" b="9970">of</wd>

<space/>

<wd l="2405" t="9811" r="3696" b="10003">consciousness,</wd>

<space/>

<wd l="3806" t="9811" r="4464" b="10013">making</wd>

<space/>

<wd l="4570" t="9811" r="4690" b="9970">it</wd>

<space/>

<wd l="4800" t="9811" r="5506" b="9970">difficult</wd>

<space/>

<wd l="5611" t="9830" r="5779" b="9970">to</wd>

<space/>

</ln>

<ln l="1416" t="10066" r="5774" b="10267" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="10085" r="2270" b="10267">punctuate</wd>

<space/>

<wd l="2333" t="10066" r="2602" b="10224">the</wd>

<space/>

<wd l="2659" t="10085" r="3038" b="10258">text,</wd>

<space/>

<wd l="3110" t="10114" r="3518" b="10224">even</wd>

<space/>

<wd l="3586" t="10066" r="3840" b="10224">for</wd>

<space/>

<wd l="3898" t="10114" r="3994" b="10224">a</wd>

<space/>

<wd l="4051" t="10066" r="4690" b="10224">human.</wd>

<space/>

<wd l="4771" t="10070" r="4944" b="10219">In</wd>

<space/>

<wd l="5006" t="10066" r="5774" b="10258">addition,</wd>

<space/>

</ln>

<ln l="1421" t="10320" r="5765" b="10522" baseLine="10469" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="10339" r="1843" b="10478">most</wd>

<space/>

<wd l="1934" t="10320" r="2616" b="10522">product</wd>

<space/>

<wd l="2712" t="10320" r="3394" b="10478">reviews</wd>

<space/>

<wd l="3494" t="10320" r="4104" b="10478">consist</wd>

<space/>

<wd l="4205" t="10320" r="4406" b="10478">of</wd>

<space/>

<wd l="4478" t="10320" r="4915" b="10478">three</wd>

<space/>

<wd l="5026" t="10320" r="5765" b="10478">sections:</wd>

<space/>

</ln>

<ln l="1421" t="10570" r="5779" b="10771" baseLine="10723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="10574" r="1858" b="10762">Pros,</wd>

<space/>

<wd l="2054" t="10570" r="2549" b="10762">Cons,</wd>

<space/>

<wd l="2746" t="10570" r="3067" b="10728">and</wd>

<space/>

<wd l="3250" t="10570" r="3936" b="10728">General</wd>

<space/>

<wd l="4133" t="10570" r="4891" b="10771">Opinion.</wd>

<space/>

<wd l="5093" t="10570" r="5779" b="10728">General</wd>

<space/>

</ln>

<ln l="1426" t="10824" r="5779" b="11026" baseLine="10978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="10824" r="2141" b="11026">Opinion</wd>

<space/>

<wd l="2194" t="10824" r="2832" b="11026">usually</wd>

<space/>

<wd l="2890" t="10824" r="3029" b="10982">is</wd>

<space/>

<wd l="3091" t="10872" r="3187" b="10982">a</wd>

<space/>

<wd l="3235" t="10824" r="3682" b="11026">plain</wd>

<space/>

<wd l="3734" t="10843" r="4114" b="11016">text,</wd>

<space/>

<wd l="4171" t="10824" r="4459" b="10982">but</wd>

<space/>

<wd l="4512" t="10829" r="4901" b="10982">Pros</wd>

<space/>

<wd l="4963" t="10824" r="5280" b="10982">and</wd>

<space/>

<wd l="5338" t="10824" r="5779" b="10982">Cons</wd>

<space/>

</ln>

<ln l="1421" t="11078" r="5774" b="11280" baseLine="11227" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="11126" r="1795" b="11280">may</wd>

<space/>

<wd l="1906" t="11098" r="2549" b="11280">present</wd>

<space/>

<wd l="2674" t="11078" r="3192" b="11280">single</wd>

<space/>

<wd l="3307" t="11078" r="3840" b="11237">words</wd>

<space/>

<wd l="3965" t="11078" r="4469" b="11280">(Pros:</wd>

<space/>

<wd l="4608" t="11078" r="5774" b="11280">inexpensive),</wd>

<space/>

</ln>

<ln l="1421" t="11328" r="5803" b="11530" baseLine="11482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="11376" r="1862" b="11486">noun</wd>

<space/>

<wd l="1920" t="11328" r="2582" b="11530">phrases</wd>

<space/>

<wd l="2654" t="11328" r="3158" b="11530">(Pros:</wd>

<space/>

<wd l="3235" t="11328" r="3850" b="11530">battery</wd>

<space/>

<wd l="3922" t="11328" r="4330" b="11530">life),</wd>

<space/>

<wd l="4397" t="11328" r="5117" b="11486">bulleted</wd>

<space/>

<wd l="5184" t="11328" r="5530" b="11486">lists</wd>

<space/>

<wd l="5602" t="11328" r="5803" b="11486">of</wd>

<space/>

</ln>

<ln l="1421" t="11582" r="5774" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="11582" r="1954" b="11741">words</wd>

<space/>

<wd l="2040" t="11582" r="2357" b="11741">and</wd>

<space/>

<wd l="2429" t="11630" r="2870" b="11741">noun</wd>

<space/>

<wd l="2942" t="11582" r="3658" b="11784">phrases,</wd>

<space/>

<wd l="3744" t="11630" r="3926" b="11741">or</wd>

<space/>

<wd l="4008" t="11582" r="4805" b="11784">complete</wd>

<space/>

<wd l="4896" t="11602" r="5774" b="11741">sentences.</wd>

<space/>

</ln>

<ln l="1421" t="11837" r="5784" b="12038" baseLine="11986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="11842" r="1728" b="11995">For</wd>

<space/>

<wd l="1790" t="11837" r="2102" b="11995">this</wd>

<space/>

<wd l="2174" t="11885" r="2794" b="12029">reason,</wd>

<space/>

<wd l="2870" t="11837" r="2990" b="11995">it</wd>

<space/>

<wd l="3058" t="11837" r="3197" b="11995">is</wd>

<space/>

<wd l="3274" t="11837" r="4291" b="12038">challenging</wd>

<space/>

<wd l="4358" t="11856" r="4526" b="11995">to</wd>

<space/>

<wd l="4589" t="11856" r="5448" b="12038">punctuate</wd>

<space/>

<wd l="5520" t="11837" r="5784" b="11995">the</wd>

<space/>

</ln>

<ln l="1421" t="12091" r="5779" b="12283" baseLine="12240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="12096" r="1805" b="12250">Pros</wd>

<space/>

<wd l="1992" t="12091" r="2309" b="12250">and</wd>

<space/>

<wd l="2486" t="12091" r="2928" b="12250">Cons</wd>

<space/>

<wd l="3120" t="12091" r="3869" b="12283">sections,</wd>

<space/>

<wd l="4056" t="12091" r="4378" b="12250">and</wd>

<space/>

<wd l="4546" t="12091" r="4814" b="12250">the</wd>

<space/>

<wd l="5002" t="12091" r="5779" b="12250">solutions</wd>

<space/>

</ln>

<ln l="1430" t="12341" r="4766" b="12542" baseLine="12494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="12341" r="2352" b="12499">sometimes</wd>

<space/>

<wd l="2414" t="12341" r="3038" b="12542">require</wd>

<space/>

<wd l="3096" t="12341" r="3850" b="12542">arbitrary</wd>

<space/>

<wd l="3907" t="12341" r="4766" b="12499">decisions.</wd>

</ln>

</para>

<para l="1421" t="12595" r="5808" b="13555" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="12595" r="5789" b="12797" baseLine="12744" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1651" t="12600" r="1829" b="12749">In</wd>

<space/>

<wd l="1872" t="12595" r="2141" b="12754">the</wd>

<space/>

<wd l="2189" t="12643" r="2770" b="12797">corpus</wd>

<space/>

<wd l="2818" t="12595" r="3019" b="12754">of</wd>

<space/>

<wd l="3038" t="12595" r="3715" b="12797">product</wd>

<space/>

<wd l="3754" t="12595" r="4488" b="12787">reviews,</wd>

<space/>

<wd l="4541" t="12595" r="5088" b="12754">unlike</wd>

<space/>

<wd l="5136" t="12595" r="5304" b="12749">in</wd>

<space/>

<wd l="5357" t="12595" r="5789" b="12754">short</wd>

<space/>

</ln>

<ln l="1421" t="12850" r="5808" b="13051" baseLine="12998" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1421" t="12898" r="2294" b="13051">messages,</wd>

<space/>

<wd l="2414" t="12850" r="2870" b="13008">word</wd>

<space/>

<wd l="2981" t="12850" r="4210" b="13042">abbreviations,</wd>

<space/>

<wd l="4334" t="12850" r="5491" b="13051">agglutination</wd>

<space/>

<wd l="5606" t="12850" r="5808" b="13008">of</wd>

<space/>

</ln>

<ln l="1430" t="13099" r="5789" b="13301" baseLine="13253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1430" t="13099" r="2040" b="13258">several</wd>

<space/>

<wd l="2107" t="13099" r="2678" b="13258">tokens</wd>

<space/>

<wd l="2755" t="13099" r="3091" b="13258">into</wd>

<space/>

<wd l="3168" t="13147" r="3264" b="13258">a</wd>

<space/>

<wd l="3336" t="13099" r="3850" b="13301">single</wd>

<space/>

<wd l="3922" t="13147" r="4286" b="13291">one,</wd>

<space/>

<wd l="4368" t="13099" r="4685" b="13258">and</wd>

<space/>

<wd l="4757" t="13099" r="5789" b="13301">suppression</wd>

<space/>

</ln>

<ln l="1426" t="13349" r="4560" b="13555" baseLine="13507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1426" t="13354" r="1627" b="13512">of</wd>

<space/>

<wd l="1661" t="13354" r="2750" b="13555">grammatical</wd>

<space/>

<wd l="2813" t="13354" r="3346" b="13512">words</wd>

<space/>

<wd l="3403" t="13354" r="3917" b="13555">rarely</wd>

<space/>

<wd l="3974" t="13402" r="4464" b="13512">occur</wd>

<space/>

<wd l="4531" t="13483" r="4560" b="13512">.</wd>

</ln>

</para>

<para l="1426" t="13814" r="5770" b="14261" alignment="left" li="360" spaceBefore="207" fli="-360" lsp="exactly" lspExact="274" language="en">

<ln l="1426" t="13814" r="5770" b="14035" baseLine="13982" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13819" r="1594" b="13987">4.</wd>

<space/>

<wd l="1781" t="13819" r="1954" b="13982">A</wd>

<space/>

<wd l="2141" t="13819" r="3518" b="13987">lexicon-based</wd>

<space/>

<wd l="3715" t="13819" r="4675" b="14035">approach</wd>

<space/>

<wd l="4867" t="13829" r="5059" b="13987">to</wd>

<space/>

<wd l="5256" t="13819" r="5770" b="13987">UGC</wd>

<space/>

</ln>

<ln l="1786" t="14088" r="3211" b="14261" baseLine="14256" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1786" t="14093" r="3211" b="14261">normalization</wd>

</ln>

</para>

<para l="1421" t="14520" r="5789" b="15226" alignment="justified" spaceBefore="157" fli="216" lsp="exactly" lspExact="248" language="en">

<ln l="1651" t="14520" r="5784" b="14678" baseLine="14669" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="14520" r="1987" b="14678">The</wd>

<space/>

<wd l="2136" t="14539" r="2683" b="14678">nature</wd>

<space/>

<wd l="2837" t="14520" r="3038" b="14678">of</wd>

<space/>

<wd l="3158" t="14520" r="3427" b="14678">the</wd>

<space/>

<wd l="3581" t="14520" r="4474" b="14678">deviations</wd>

<space/>

<wd l="4632" t="14520" r="5472" b="14678">described</wd>

<space/>

<wd l="5621" t="14520" r="5784" b="14674">in</wd>

<space/>

</ln>

<ln l="1430" t="14770" r="5789" b="14971" baseLine="14923" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14770" r="2078" b="14928">Section</wd>

<space/>

<wd l="2246" t="14770" r="2338" b="14928">3</wd>

<space/>

<wd l="2515" t="14770" r="2928" b="14928">have</wd>

<space/>

<wd l="3091" t="14770" r="3974" b="14928">motivated</wd>

<space/>

<wd l="4133" t="14818" r="4325" b="14928">us</wd>

<space/>

<wd l="4493" t="14789" r="4661" b="14928">to</wd>

<space/>

<wd l="4834" t="14770" r="5520" b="14971">develop</wd>

<space/>

<wd l="5693" t="14818" r="5789" b="14928">a</wd>

<space/>

</ln>

<ln l="1421" t="15024" r="5770" b="15226" baseLine="15178" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="15024" r="2640" b="15182">normalization</wd>

<space/>

<wd l="2731" t="15024" r="3072" b="15182">tool</wd>

<space/>

<wd l="3168" t="15024" r="3845" b="15182">tailored</wd>

<space/>

<wd l="3936" t="15024" r="4190" b="15182">for</wd>

<space/>

<wd l="4277" t="15024" r="4954" b="15226">product</wd>

<space/>

<wd l="5045" t="15024" r="5770" b="15182">reviews.</wd>

</ln>

</para>

</column>

<column l="6113" t="1417" r="10524" b="15235">

<para l="6120" t="1464" r="10512" b="3941" alignment="justified" spaceBefore="3" lsp="exactly" lspExact="252" language="en">

<ln l="6130" t="1464" r="10483" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="1464" r="6466" b="1622">The</wd>

<space/>

<wd l="6533" t="1464" r="6902" b="1666">goal</wd>

<space/>

<wd l="6970" t="1464" r="7109" b="1622">is</wd>

<space/>

<wd l="7171" t="1483" r="7339" b="1622">to</wd>

<space/>

<wd l="7406" t="1464" r="8280" b="1622">normalize</wd>

<space/>

<wd l="8342" t="1464" r="8606" b="1622">the</wd>

<space/>

<wd l="8674" t="1464" r="9571" b="1622">deviations</wd>

<space/>

<wd l="9638" t="1464" r="9950" b="1622">due</wd>

<space/>

<wd l="10013" t="1483" r="10229" b="1622">to:</wd>

<space/>

<wd l="10334" t="1464" r="10483" b="1666">1)</wd>

<space/>

</ln>

<ln l="6125" t="1718" r="10512" b="1910" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="1718" r="6394" b="1877">the</wd>

<space/>

<wd l="6552" t="1766" r="6926" b="1877">case</wd>

<space/>

<wd l="7080" t="1766" r="7421" b="1910">use,</wd>

<space/>

<wd l="7589" t="1718" r="7757" b="1872">in</wd>

<space/>

<wd l="7910" t="1718" r="8338" b="1877">what</wd>

<space/>

<wd l="8496" t="1766" r="9274" b="1877">concerns</wd>

<space/>

<wd l="9432" t="1718" r="9701" b="1877">the</wd>

<space/>

<wd l="9854" t="1766" r="10147" b="1877">use</wd>

<space/>

<wd l="10310" t="1718" r="10512" b="1877">of</wd>

<space/>

</ln>

<ln l="6130" t="1968" r="10488" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="1968" r="7003" b="2126">lowercase</wd>

<space/>

<wd l="7070" t="1968" r="7699" b="2126">instead</wd>

<space/>

<wd l="7762" t="1968" r="7963" b="2126">of</wd>

<space/>

<wd l="8006" t="2016" r="8933" b="2170">uppercase;</wd>

<space/>

<wd l="9019" t="1968" r="9192" b="2170">2)</wd>

<space/>

<wd l="9264" t="1968" r="9528" b="2126">the</wd>

<space/>

<wd l="9605" t="1968" r="10488" b="2126">correction</wd>

<space/>

</ln>

<ln l="6130" t="2222" r="10493" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="2222" r="6331" b="2381">of</wd>

<space/>

<wd l="6350" t="2222" r="7498" b="2424">misspellings,</wd>

<space/>

<wd l="7555" t="2242" r="8122" b="2424">except</wd>

<space/>

<wd l="8170" t="2222" r="8424" b="2381">for</wd>

<space/>

<wd l="8462" t="2222" r="8928" b="2381">those</wd>

<space/>

<wd l="8976" t="2270" r="9432" b="2381">cases</wd>

<space/>

<wd l="9480" t="2222" r="9811" b="2381">that</wd>

<space/>

<wd l="9859" t="2222" r="10493" b="2424">depend</wd>

<space/>

</ln>

<ln l="6130" t="2477" r="10488" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="2525" r="6346" b="2635">on</wd>

<space/>

<wd l="6418" t="2477" r="7325" b="2635">contextual</wd>

<space/>

<wd l="7402" t="2477" r="7843" b="2635">clues</wd>

<space/>

<wd l="7915" t="2496" r="8083" b="2635">to</wd>

<space/>

<wd l="8160" t="2477" r="9322" b="2678">disambiguate</wd>

<space/>

<wd l="9394" t="2496" r="9720" b="2635">two</wd>

<space/>

<wd l="9792" t="2477" r="10488" b="2678">existing</wd>

<space/>

</ln>

<ln l="6125" t="2726" r="10507" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="2726" r="6658" b="2885">words</wd>

<space/>

<wd l="6826" t="2726" r="6994" b="2880">in</wd>

<space/>

<wd l="7152" t="2731" r="8174" b="2928">Portuguese;</wd>

<space/>

<wd l="8352" t="2726" r="8525" b="2928">3)</wd>

<space/>

<wd l="8688" t="2726" r="8957" b="2885">the</wd>

<space/>

<wd l="9130" t="2726" r="10147" b="2885">substitution</wd>

<space/>

<wd l="10310" t="2726" r="10507" b="2885">of</wd>

<space/>

</ln>

<ln l="6130" t="2981" r="10483" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6130" t="2986" r="6806" b="3139">Internet</wd>

<space/>

<wd l="6869" t="2981" r="7325" b="3182">slang</wd>

<space/>

<wd l="7368" t="2981" r="7594" b="3182">by</wd>

<space/>

<wd l="7656" t="2981" r="8400" b="3139">standard</wd>

<space/>

<wd l="8448" t="2981" r="9235" b="3182">language</wd>

<space/>

<wd l="9288" t="2981" r="9878" b="3173">words,</wd>

<space/>

<wd l="9941" t="2981" r="10258" b="3139">and</wd>

<space/>

<wd l="10306" t="2981" r="10483" b="3182">4)</wd>

<space/>

</ln>

<ln l="6125" t="3235" r="10493" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="3235" r="6394" b="3394">the</wd>

<space/>

<wd l="6658" t="3235" r="7421" b="3394">insertion</wd>

<space/>

<wd l="7680" t="3235" r="7882" b="3394">of</wd>

<space/>

<wd l="8117" t="3235" r="8803" b="3437">missing</wd>

<space/>

<wd l="9053" t="3235" r="9706" b="3437">periods</wd>

<space/>

<wd l="9970" t="3235" r="10493" b="3437">(other</wd>

<space/>

</ln>

<ln l="6120" t="3490" r="10483" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6120" t="3490" r="7166" b="3691">punctuation</wd>

<space/>

<wd l="7277" t="3490" r="7805" b="3648">marks</wd>

<space/>

<wd l="7925" t="3490" r="8261" b="3648">will</wd>

<space/>

<wd l="8376" t="3490" r="8582" b="3648">be</wd>

<space/>

<wd l="8702" t="3490" r="9571" b="3648">addressed</wd>

<space/>

<wd l="9682" t="3490" r="9850" b="3643">in</wd>

<space/>

<wd l="9970" t="3490" r="10483" b="3648">future</wd>

<space/>

</ln>

<ln l="6125" t="3739" r="6691" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6125" t="3739" r="6691" b="3941">work).</wd>

</ln>

</para>

<para l="6120" t="3994" r="10493" b="6221" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="3994" r="10493" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="3994" r="6715" b="4152">One</wd>

<space/>

<wd l="6955" t="3994" r="7157" b="4152">of</wd>

<space/>

<wd l="7358" t="3994" r="7627" b="4152">the</wd>

<space/>

<wd l="7862" t="3994" r="8784" b="4195">challenges</wd>

<space/>

<wd l="9024" t="3994" r="9226" b="4152">of</wd>

<space/>

<wd l="9427" t="3994" r="10166" b="4195">building</wd>

<space/>

<wd l="10397" t="4042" r="10493" b="4152">a</wd>

<space/>

</ln>

<ln l="6125" t="4248" r="10488" b="4406" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="4248" r="7344" b="4406">normalization</wd>

<space/>

<wd l="7483" t="4248" r="7819" b="4406">tool</wd>

<space/>

<wd l="7963" t="4248" r="8458" b="4406">refers</wd>

<space/>

<wd l="8597" t="4267" r="8765" b="4406">to</wd>

<space/>

<wd l="8909" t="4248" r="9288" b="4406">how</wd>

<space/>

<wd l="9422" t="4267" r="9590" b="4406">to</wd>

<space/>

<wd l="9734" t="4248" r="10488" b="4406">combine</wd>

<space/>

</ln>

<ln l="6130" t="4498" r="10488" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="4498" r="6883" b="4656">different</wd>

<space/>

<wd l="6946" t="4498" r="8165" b="4656">normalization</wd>

<space/>

<wd l="8227" t="4498" r="9192" b="4699">procedures</wd>

<space/>

<wd l="9264" t="4498" r="9432" b="4651">in</wd>

<space/>

<wd l="9499" t="4498" r="9893" b="4656">such</wd>

<space/>

<wd l="9965" t="4546" r="10061" b="4656">a</wd>

<space/>

<wd l="10118" t="4546" r="10488" b="4699">way</wd>

<space/>

</ln>

<ln l="6125" t="4752" r="10483" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="4752" r="6456" b="4910">that</wd>

<space/>

<wd l="6518" t="4752" r="6787" b="4910">the</wd>

<space/>

<wd l="6854" t="4752" r="7349" b="4910">effect</wd>

<space/>

<wd l="7416" t="4752" r="7618" b="4910">of</wd>

<space/>

<wd l="7661" t="4800" r="7757" b="4910">a</wd>

<space/>

<wd l="7814" t="4752" r="8698" b="4954">procedure</wd>

<space/>

<wd l="8765" t="4752" r="9158" b="4910">does</wd>

<space/>

<wd l="9226" t="4771" r="9509" b="4910">not</wd>

<space/>

<wd l="9547" t="4752" r="10483" b="4954">jeopardize</wd>

<space/>

</ln>

<ln l="6125" t="5006" r="10483" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5006" r="6394" b="5165">the</wd>

<space/>

<wd l="6466" t="5006" r="7435" b="5208">subsequent</wd>

<space/>

<wd l="7502" t="5054" r="7944" b="5165">ones.</wd>

<space/>

<wd l="8016" t="5011" r="8328" b="5165">For</wd>

<space/>

<wd l="8395" t="5006" r="9182" b="5208">example,</wd>

<space/>

<wd l="9250" t="5006" r="9691" b="5165">there</wd>

<space/>

<wd l="9758" t="5054" r="10022" b="5165">are</wd>

<space/>

<wd l="10085" t="5054" r="10483" b="5165">non-</wd>

</ln>

<ln l="6134" t="5261" r="10483" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="5261" r="6878" b="5419">standard</wd>

<space/>

<wd l="6979" t="5261" r="7512" b="5419">words</wd>

<space/>

<wd l="7627" t="5261" r="8054" b="5419">from</wd>

<space/>

<wd l="8165" t="5266" r="8846" b="5419">Internet</wd>

<space/>

<wd l="8966" t="5261" r="9418" b="5462">slang</wd>

<space/>

<wd l="9533" t="5309" r="9706" b="5419">as</wd>

<space/>

<wd l="9821" t="5261" r="10195" b="5419">well</wd>

<space/>

<wd l="10310" t="5309" r="10483" b="5419">as</wd>

<space/>

</ln>

<ln l="6125" t="5510" r="10483" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5510" r="6715" b="5669">named</wd>

<space/>

<wd l="6816" t="5510" r="7440" b="5669">entities</wd>

<space/>

<wd l="7546" t="5510" r="8170" b="5669">written</wd>

<space/>

<wd l="8275" t="5510" r="8443" b="5664">in</wd>

<space/>

<wd l="8544" t="5510" r="9418" b="5669">lowercase</wd>

<space/>

<wd l="9523" t="5558" r="10118" b="5712">among</wd>

<space/>

<wd l="10214" t="5510" r="10483" b="5669">the</wd>

<space/>

</ln>

<ln l="6130" t="5765" r="10493" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5765" r="6600" b="5923">OOV</wd>

<space/>

<wd l="6725" t="5765" r="7310" b="5923">words.</wd>

<space/>

<wd l="7450" t="5765" r="7901" b="5966">They</wd>

<space/>

<wd l="8026" t="5765" r="8448" b="5923">need</wd>

<space/>

<wd l="8568" t="5784" r="8736" b="5923">to</wd>

<space/>

<wd l="8861" t="5765" r="9072" b="5923">be</wd>

<space/>

<wd l="9206" t="5765" r="10051" b="5923">identified</wd>

<space/>

<wd l="10176" t="5765" r="10493" b="5923">and</wd>

<space/>

</ln>

<ln l="6120" t="6019" r="9163" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="6019" r="6946" b="6221">protected</wd>

<space/>

<wd l="7003" t="6019" r="7426" b="6178">from</wd>

<space/>

<wd l="7488" t="6019" r="8179" b="6221">spelling</wd>

<space/>

<wd l="8237" t="6019" r="9163" b="6178">correction.</wd>

</ln>

</para>

<para l="6120" t="6269" r="10488" b="6979" alignment="justified" spaceAfter="7805" fli="216" lsp="exactly" lspExact="253" language="en" id="_1_4_42">

<ln l="6355" t="6269" r="10488" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6355" t="6269" r="6696" b="6427">The</wd>

<space/>

<wd l="6744" t="6269" r="7560" b="6470">proposed</wd>

<space/>

<wd l="7608" t="6269" r="8318" b="6470">pipeline</wd>

<space/>

<wd l="8376" t="6269" r="9408" b="6427">architecture</wd>

<space/>

<wd l="9470" t="6269" r="9672" b="6427">of</wd>

<space/>

<wd l="9701" t="6269" r="9970" b="6427">the</wd>

<space/>

<wd l="10027" t="6269" r="10488" b="6427">UGC</wd>

<space/>

</ln>

<ln l="6120" t="6523" r="10488" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6120" t="6523" r="7128" b="6682">Normalizer</wd>

<space/>

<wd l="7229" t="6523" r="7637" b="6682">Tool</wd>

<space/>

<wd l="7747" t="6523" r="9019" b="6725">(UCGNormal)</wd>

<space/>

<wd l="9134" t="6523" r="9269" b="6682">is</wd>

<space/>

<wd l="9370" t="6523" r="10224" b="6725">presented</wd>

<space/>

<wd l="10320" t="6523" r="10488" b="6677">in</wd>

<space/>

</ln>

<ln l="6125" t="6778" r="6682" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="6778" r="6461" b="6979">Fig.</wd>

<space/>

<wd l="6552" t="6778" r="6682" b="6936">1.</wd>

</ln>

</para>

<para l="6614" t="14832" r="9989" b="15034" alignment="centered" spaceAfter="177" lsp="exactly" lspExact="253" language="en">

<ln l="6614" t="14832" r="9989" b="15034" baseLine="14986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6614" t="14832" r="7186" b="15034">Figure</wd>

<space/>

<wd l="7267" t="14832" r="7397" b="14990">1:</wd>

<space/>

<wd l="7469" t="14832" r="8568" b="14990">Architecture</wd>

<space/>

<wd l="8630" t="14832" r="8832" b="14990">of</wd>

<space/>

<wd l="8866" t="14832" r="9989" b="14990">UGCNormal</wd>

</ln>

</para>

</column>

</section>

<dd l="6686" t="6809" r="10524" b="14711" anchorTo="toPage" refTo="_1_4_42">

<picture l="6994" t="6950" r="9614" b="14592" alignment="left" li="308" ri="910" spaceBefore="141" spaceAfter="119">

</picture>

</dd>

<dd l="1402" t="15746" r="10524" b="15975">

<para l="5800" t="15787" r="6128" b="15941" alignment="centered" lsp="exactly" lspExact="223" language="en">

<ln l="5866" t="15787" r="6062" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="40">

<wd l="5866" t="15792" r="6062" b="15941">41</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1402" marginTop="1417" marginRight="1385" marginBottom="1302" offsetX="-8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1402" t="1417" r="10524" b="15430">

<column l="1402" t="1417" r="5813" b="15430">

<para l="1416" t="1464" r="5789" b="3931" alignment="justified" spaceBefore="4" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="1464" r="5779" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="1464" r="1987" b="1622">The</wd>

<space/>

<wd l="2083" t="1464" r="2530" b="1666">input</wd>

<space/>

<wd l="2621" t="1464" r="2760" b="1622">is</wd>

<space/>

<wd l="2856" t="1512" r="2952" b="1622">a</wd>

<space/>

<wd l="3038" t="1464" r="3499" b="1622">UGC</wd>

<space/>

<wd l="3590" t="1483" r="3922" b="1622">text</wd>

<space/>

<wd l="4008" t="1464" r="4632" b="1622">written</wd>

<space/>

<wd l="4723" t="1464" r="4891" b="1618">in</wd>

<space/>

<wd l="4978" t="1464" r="5779" b="1622">Brazilian</wd>

<space/>

</ln>

<ln l="1421" t="1718" r="5779" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="1723" r="2438" b="1920">Portuguese.</wd>

<space/>

<wd l="2510" t="1718" r="2846" b="1877">The</wd>

<space/>

<wd l="2904" t="1718" r="3254" b="1877">first</wd>

<space/>

<wd l="3317" t="1738" r="3658" b="1920">step</wd>

<space/>

<wd l="3720" t="1718" r="4411" b="1877">consists</wd>

<space/>

<wd l="4469" t="1718" r="4637" b="1872">in</wd>

<space/>

<wd l="4694" t="1718" r="5462" b="1920">applying</wd>

<space/>

<wd l="5510" t="1718" r="5779" b="1877">the</wd>

<space/>

</ln>

<ln l="1430" t="1968" r="5779" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="1987" r="2174" b="2126">sentence</wd>

<space/>

<wd l="2266" t="1968" r="3427" b="2170">segmentation</wd>

<space/>

<wd l="3504" t="1968" r="3845" b="2126">tool</wd>

<space/>

<wd l="3922" t="1968" r="4742" b="2170">proposed</wd>

<space/>

<wd l="4819" t="1968" r="4987" b="2122">in</wd>

<space/>

<wd l="5074" t="1968" r="5779" b="2126">Condori</wd>

<space/>

</ln>

<ln l="1426" t="2222" r="5779" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2222" r="1742" b="2381">and</wd>

<space/>

<wd l="1819" t="2222" r="2328" b="2381">Pardo</wd>

<space/>

<wd l="2419" t="2222" r="3048" b="2424">(2015),</wd>

<space/>

<wd l="3139" t="2222" r="3677" b="2381">which</wd>

<space/>

<wd l="3758" t="2222" r="3898" b="2381">is</wd>

<space/>

<wd l="3989" t="2270" r="4085" b="2381">a</wd>

<space/>

<wd l="4162" t="2222" r="4906" b="2381">machine</wd>

<space/>

<wd l="4992" t="2222" r="5779" b="2424">learning-</wd>

</ln>

<ln l="1416" t="2477" r="5779" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="2477" r="1925" b="2635">based</wd>

<space/>

<wd l="2016" t="2496" r="2621" b="2678">system</wd>

<space/>

<wd l="2702" t="2477" r="3322" b="2635">trained</wd>

<space/>

<wd l="3408" t="2477" r="3576" b="2630">in</wd>

<space/>

<wd l="3667" t="2525" r="3763" b="2635">a</wd>

<space/>

<wd l="3826" t="2477" r="4824" b="2678">journalistic</wd>

<space/>

<wd l="4925" t="2525" r="5549" b="2678">corpus.</wd>

<space/>

<wd l="5654" t="2482" r="5779" b="2635">It</wd>

<space/>

</ln>

<ln l="1426" t="2726" r="5789" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2726" r="1987" b="2885">allows</wd>

<space/>

<wd l="2045" t="2774" r="2237" b="2885">us</wd>

<space/>

<wd l="2294" t="2746" r="2462" b="2885">to</wd>

<space/>

<wd l="2520" t="2726" r="3005" b="2885">insert</wd>

<space/>

<wd l="3053" t="2726" r="3701" b="2928">periods</wd>

<space/>

<wd l="3758" t="2726" r="4296" b="2885">where</wd>

<space/>

<wd l="4349" t="2726" r="4733" b="2928">they</wd>

<space/>

<wd l="4786" t="2774" r="5050" b="2885">are</wd>

<space/>

<wd l="5102" t="2726" r="5789" b="2928">missing</wd>

<space/>

</ln>

<ln l="1426" t="2981" r="5774" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="2981" r="1786" b="3173">and,</wd>

<space/>

<wd l="1867" t="2981" r="3067" b="3182">consequently,</wd>

<space/>

<wd l="3144" t="3000" r="3312" b="3139">to</wd>

<space/>

<wd l="3384" t="2981" r="4133" b="3182">properly</wd>

<space/>

<wd l="4205" t="3000" r="4862" b="3139">convert</wd>

<space/>

<wd l="4930" t="2981" r="5198" b="3139">the</wd>

<space/>

<wd l="5270" t="2981" r="5774" b="3139">initial</wd>

<space/>

</ln>

<ln l="1421" t="3235" r="5779" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="3235" r="1954" b="3394">words</wd>

<space/>

<wd l="2107" t="3254" r="2275" b="3394">to</wd>

<space/>

<wd l="2434" t="3283" r="3355" b="3437">uppercase.</wd>

<space/>

<wd l="3518" t="3235" r="4042" b="3394">When</wd>

<space/>

<wd l="4200" t="3235" r="5045" b="3394">evaluated</wd>

<space/>

<wd l="5198" t="3235" r="5366" b="3389">in</wd>

<space/>

<wd l="5515" t="3235" r="5779" b="3394">the</wd>

<space/>

</ln>

<ln l="1421" t="3490" r="5770" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="3490" r="2165" b="3691">Buscapé</wd>

<space/>

<wd l="2246" t="3538" r="2875" b="3691">corpus,</wd>

<space/>

<wd l="2962" t="3490" r="3082" b="3648">it</wd>

<space/>

<wd l="3158" t="3490" r="3941" b="3648">achieved</wd>

<space/>

<wd l="4018" t="3490" r="4498" b="3648">0.953</wd>

<space/>

<wd l="4589" t="3490" r="4843" b="3648">for</wd>

<space/>

<wd l="4910" t="3490" r="5770" b="3691">precision;</wd>

<space/>

</ln>

<ln l="1426" t="3739" r="5117" b="3931" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="3739" r="1901" b="3898">0.895</wd>

<space/>

<wd l="1978" t="3739" r="2227" b="3898">for</wd>

<space/>

<wd l="2280" t="3739" r="2818" b="3931">recall;</wd>

<space/>

<wd l="2890" t="3739" r="3206" b="3898">and</wd>

<space/>

<wd l="3264" t="3739" r="3730" b="3898">0.921</wd>

<space/>

<wd l="3811" t="3739" r="4066" b="3898">for</wd>

<space/>

<wd l="4118" t="3744" r="5117" b="3898">F-Measure.</wd>

</ln>

</para>

<para l="1421" t="3994" r="5789" b="6725" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1656" t="3994" r="5779" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1656" t="3994" r="2880" b="4195">Subsequently,</wd>

<space/>

<wd l="3038" t="3994" r="3307" b="4152">the</wd>

<space/>

<wd l="3470" t="4013" r="4301" b="4152">sentences</wd>

<space/>

<wd l="4459" t="4042" r="4723" b="4152">are</wd>

<space/>

<wd l="4877" t="3994" r="5779" b="4186">tokenized,</wd>

<space/>

</ln>

<ln l="1430" t="4248" r="5779" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="4248" r="2434" b="4450">specifically</wd>

<space/>

<wd l="2549" t="4248" r="3509" b="4450">accounting</wd>

<space/>

<wd l="3624" t="4248" r="3878" b="4406">for</wd>

<space/>

<wd l="3989" t="4248" r="4258" b="4406">the</wd>

<space/>

<wd l="4368" t="4267" r="4915" b="4406">nature</wd>

<space/>

<wd l="5030" t="4248" r="5232" b="4406">of</wd>

<space/>

<wd l="5318" t="4248" r="5779" b="4406">UGC</wd>

<space/>

</ln>

<ln l="1421" t="4498" r="5789" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="4517" r="1877" b="4656">texts.</wd>

<space/>

<wd l="2016" t="4498" r="2746" b="4699">Usually,</wd>

<space/>

<wd l="2880" t="4498" r="3778" b="4656">tokenizers</wd>

<space/>

<wd l="3912" t="4498" r="4656" b="4656">consider</wd>

<space/>

<wd l="4786" t="4498" r="5170" b="4699">only</wd>

<space/>

<wd l="5290" t="4498" r="5789" b="4656">blank</wd>

<space/>

</ln>

<ln l="1430" t="4752" r="5770" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="4800" r="2040" b="4954">spaces,</wd>

<space/>

<wd l="2165" t="4752" r="3254" b="4954">punctuation,</wd>

<space/>

<wd l="3384" t="4752" r="3706" b="4910">and</wd>

<space/>

<wd l="3821" t="4752" r="4147" b="4910">few</wd>

<space/>

<wd l="4277" t="4752" r="4872" b="4954">special</wd>

<space/>

<wd l="5006" t="4752" r="5770" b="4954">symbols.</wd>

<space/>

</ln>

<ln l="1421" t="5006" r="5784" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5011" r="2270" b="5198">However,</wd>

<space/>

<wd l="2352" t="5006" r="2832" b="5165">when</wd>

<space/>

<wd l="2899" t="5006" r="3845" b="5208">processing</wd>

<space/>

<wd l="3917" t="5006" r="4430" b="5198">UGC,</wd>

<space/>

<wd l="4517" t="5006" r="4637" b="5165">it</wd>

<space/>

<wd l="4714" t="5006" r="4853" b="5165">is</wd>

<space/>

<wd l="4925" t="5054" r="5784" b="5208">necessary</wd>

<space/>

</ln>

<ln l="1421" t="5261" r="5784" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5280" r="1584" b="5419">to</wd>

<space/>

<wd l="1733" t="5261" r="2477" b="5419">consider</wd>

<space/>

<wd l="2611" t="5261" r="2880" b="5419">the</wd>

<space/>

<wd l="3019" t="5309" r="3979" b="5419">occurrence</wd>

<space/>

<wd l="4123" t="5261" r="4325" b="5419">of</wd>

<space/>

<wd l="4440" t="5309" r="4886" b="5419">more</wd>

<space/>

<wd l="5030" t="5261" r="5784" b="5462">complex</wd>

<space/>

</ln>

<ln l="1421" t="5510" r="5779" b="5712" baseLine="5664">

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1421" t="5510" r="2040" b="5702">tokens,</wd>

<space/>

<wd l="2122" t="5510" r="2443" b="5669">like</wd>

<space/>

<wd l="2520" t="5510" r="3418" b="5669">emoticons</wd>

<space/>

<wd l="3494" t="5510" r="3562" b="5712">(</wd>

<space/>

<wd l="3653" t="5510" r="3691" b="5573">‘</wd>

<space/>

<wd l="3792" t="5510" r="3902" b="5712">:)</wd>

<space/>

<wd l="3998" t="5510" r="4104" b="5702">’,</wd>

<space/>

<wd l="4200" t="5510" r="4238" b="5573">‘</wd>

<space/>

</run>

<wd l="4334" t="5510" r="4526" b="5712"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">-</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="4622" t="5510" r="4723" b="5702">’,</wd>

<space/>

<wd l="4819" t="5510" r="4858" b="5573">‘</wd>

<space/>

<wd l="4958" t="5510" r="5078" b="5712">:(</wd>

<space/>

<wd l="5165" t="5510" r="5270" b="5702">‘,</wd>

<space/>

<wd l="5352" t="5510" r="5779" b="5712">etc.),</wd>

<space/>

</run>

</ln>

<ln l="1421" t="5765" r="5779" b="5966" baseLine="5914">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1421" t="5765" r="1843" b="5923">units</wd>

<space/>

<wd l="1954" t="5765" r="2150" b="5923">of</wd>

<space/>

<wd l="2232" t="5784" r="3403" b="5923">measurement</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="3504" t="5765" r="4186" b="5966">(‘1GB’,</wd>

<space/>

<wd l="4310" t="5765" r="5088" b="5957">‘100Kb’,</wd>

<space/>

</run>

<wd l="5213" t="5765" r="5779" b="5957"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">‘2mb’</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

</ln>

<ln l="1426" t="6019" r="5789" b="6221" baseLine="6168">

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1426" t="6019" r="1853" b="6221">etc.),</wd>

<space/>

<wd l="1910" t="6019" r="2232" b="6178">and</wd>

<space/>

<wd l="2275" t="6019" r="2914" b="6178">URL’s.</wd>

<space/>

<wd l="2981" t="6024" r="3154" b="6173">In</wd>

<space/>

<wd l="3206" t="6019" r="3672" b="6178">order</wd>

<space/>

<wd l="3720" t="6038" r="3883" b="6178">to</wd>

<space/>

<wd l="3931" t="6019" r="4685" b="6221">properly</wd>

<space/>

<wd l="4738" t="6019" r="5419" b="6221">identify</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="5472" t="6019" r="5789" b="6178">and</wd>

<space/>

</run>

</ln>

<ln l="1430" t="6269" r="5789" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="6269" r="1800" b="6470">split</wd>

<space/>

<wd l="1925" t="6269" r="2496" b="6427">tokens</wd>

<space/>

<wd l="2630" t="6269" r="2952" b="6427">like</wd>

<space/>

<wd l="3082" t="6269" r="3595" b="6461">those,</wd>

<space/>

<wd l="3730" t="6317" r="3984" b="6427">we</wd>

<space/>

<wd l="4114" t="6269" r="4526" b="6427">have</wd>

<space/>

<wd l="4661" t="6269" r="5563" b="6470">developed</wd>

<space/>

<wd l="5693" t="6317" r="5789" b="6427">a</wd>

<space/>

</ln>

<ln l="1421" t="6523" r="5650" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="6523" r="2242" b="6682">tokenizer</wd>

<space/>

<wd l="2294" t="6523" r="2770" b="6725">using</wd>

<space/>

<wd l="2827" t="6523" r="3763" b="6682">GNU-Flex</wd>

<space/>

<wd l="3826" t="6523" r="4402" b="6682">lexical</wd>

<space/>

<wd l="4469" t="6523" r="5208" b="6725">analyzer</wd>

<space/>

<wd l="5261" t="6523" r="5650" b="6682">tool.</wd>

</ln>

</para>

<para l="1421" t="6778" r="5789" b="9715" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="6778" r="5789" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="6778" r="1987" b="6936">The</wd>

<space/>

<wd l="2040" t="6778" r="3264" b="6936">lexicon-based</wd>

<space/>

<wd l="3317" t="6778" r="4570" b="6979">Spell-Checker</wd>

<space/>

<wd l="4618" t="6778" r="5525" b="6979">developed</wd>

<space/>

<wd l="5563" t="6778" r="5789" b="6979">by</wd>

<space/>

</ln>

<ln l="1421" t="7027" r="5784" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="7027" r="2098" b="7224">Avanço</wd>

<space/>

<wd l="2189" t="7046" r="2342" b="7186">et</wd>

<space/>

<wd l="2429" t="7027" r="2630" b="7186">al.</wd>

<space/>

<wd l="2726" t="7027" r="3302" b="7229">(2014)</wd>

<space/>

<wd l="3398" t="7027" r="3792" b="7186">does</wd>

<space/>

<wd l="3874" t="7027" r="4142" b="7186">the</wd>

<space/>

<wd l="4229" t="7027" r="4747" b="7229">major</wd>

<space/>

<wd l="4819" t="7046" r="5165" b="7229">part</wd>

<space/>

<wd l="5251" t="7027" r="5453" b="7186">of</wd>

<space/>

<wd l="5515" t="7027" r="5784" b="7186">the</wd>

<space/>

</ln>

<ln l="1421" t="7282" r="5789" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="7282" r="2640" b="7440">normalization</wd>

<space/>

<wd l="2688" t="7330" r="3398" b="7483">process.</wd>

<space/>

<wd l="3466" t="7286" r="3595" b="7440">It</wd>

<space/>

<wd l="3648" t="7330" r="3989" b="7440">was</wd>

<space/>

<wd l="4056" t="7282" r="4829" b="7483">specially</wd>

<space/>

<wd l="4886" t="7282" r="5789" b="7483">developed</wd>

<space/>

</ln>

<ln l="1421" t="7536" r="5770" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="7555" r="1584" b="7694">to</wd>

<space/>

<wd l="1642" t="7536" r="2165" b="7694">tackle</wd>

<space/>

<wd l="2213" t="7536" r="4262" b="7738">phonetically-motivated</wd>

<space/>

<wd l="4306" t="7536" r="5458" b="7738">misspellings,</wd>

<space/>

<wd l="5520" t="7536" r="5770" b="7694">i.e.</wd>

<space/>

</ln>

<ln l="1421" t="7790" r="5784" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="7790" r="1954" b="7949">words</wd>

<space/>

<wd l="2050" t="7790" r="2674" b="7949">written</wd>

<space/>

<wd l="2770" t="7838" r="2947" b="7949">as</wd>

<space/>

<wd l="3043" t="7790" r="3422" b="7992">they</wd>

<space/>

<wd l="3518" t="7838" r="3782" b="7949">are</wd>

<space/>

<wd l="3869" t="7790" r="4958" b="7992">pronounced.</wd>

<space/>

<wd l="5059" t="7790" r="5784" b="7949">Another</wd>

<space/>

</ln>

<ln l="1426" t="8040" r="5779" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8040" r="2275" b="8242">important</wd>

<space/>

<wd l="2419" t="8040" r="3581" b="8198">characteristic</wd>

<space/>

<wd l="3730" t="8040" r="3931" b="8198">of</wd>

<space/>

<wd l="4046" t="8040" r="4358" b="8198">this</wd>

<space/>

<wd l="4512" t="8040" r="5093" b="8242">speller</wd>

<space/>

<wd l="5232" t="8040" r="5371" b="8198">is</wd>

<space/>

<wd l="5515" t="8040" r="5779" b="8198">the</wd>

<space/>

</ln>

<ln l="1426" t="8294" r="5779" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="8294" r="2280" b="8453">automatic</wd>

<space/>

<wd l="2386" t="8294" r="3322" b="8486">correction,</wd>

<space/>

<wd l="3427" t="8342" r="3605" b="8453">as</wd>

<space/>

<wd l="3706" t="8294" r="3826" b="8453">it</wd>

<space/>

<wd l="3922" t="8294" r="4320" b="8453">does</wd>

<space/>

<wd l="4416" t="8314" r="4699" b="8453">not</wd>

<space/>

<wd l="4790" t="8342" r="5779" b="8496">presuppose</wd>

<space/>

</ln>

<ln l="1421" t="8549" r="5784" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8597" r="1790" b="8707">user</wd>

<space/>

<wd l="1858" t="8549" r="2837" b="8707">interaction.</wd>

<space/>

<wd l="2918" t="8549" r="3826" b="8741">Therefore,</wd>

<space/>

<wd l="3907" t="8549" r="4531" b="8707">instead</wd>

<space/>

<wd l="4598" t="8549" r="4800" b="8707">of</wd>

<space/>

<wd l="4853" t="8549" r="5784" b="8750">suggesting</wd>

<space/>

</ln>

<ln l="1430" t="8798" r="5784" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="8846" r="1882" b="8957">some</wd>

<space/>

<wd l="1978" t="8798" r="2899" b="8957">candidates</wd>

<space/>

<wd l="2995" t="8798" r="3250" b="8957">for</wd>

<space/>

<wd l="3341" t="8798" r="4277" b="8990">correction,</wd>

<space/>

<wd l="4378" t="8798" r="4498" b="8957">it</wd>

<space/>

<wd l="4594" t="8798" r="5784" b="9000">automatically</wd>

<space/>

</ln>

<ln l="1421" t="9053" r="5789" b="9254" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="9053" r="2136" b="9254">replaces</wd>

<space/>

<wd l="2189" t="9053" r="2458" b="9211">the</wd>

<space/>

<wd l="2506" t="9053" r="3456" b="9254">misspelled</wd>

<space/>

<wd l="3499" t="9053" r="3955" b="9211">word</wd>

<space/>

<wd l="4003" t="9053" r="4392" b="9211">with</wd>

<space/>

<wd l="4440" t="9053" r="4709" b="9211">the</wd>

<space/>

<wd l="4757" t="9053" r="5789" b="9211">best-ranked</wd>

<space/>

</ln>

<ln l="1426" t="9307" r="5779" b="9509" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9307" r="2304" b="9466">candidate.</wd>

<space/>

<wd l="2386" t="9312" r="2558" b="9461">In</wd>

<space/>

<wd l="2630" t="9307" r="3029" b="9466">such</wd>

<space/>

<wd l="3091" t="9355" r="3187" b="9466">a</wd>

<space/>

<wd l="3259" t="9307" r="4027" b="9499">scenario,</wd>

<space/>

<wd l="4099" t="9307" r="4363" b="9466">the</wd>

<space/>

<wd l="4435" t="9355" r="5213" b="9509">accuracy</wd>

<space/>

<wd l="5275" t="9307" r="5477" b="9466">of</wd>

<space/>

<wd l="5515" t="9307" r="5779" b="9466">the</wd>

<space/>

</ln>

<ln l="1426" t="9557" r="3120" b="9715" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="9557" r="1776" b="9715">first</wd>

<space/>

<wd l="1829" t="9557" r="2059" b="9715">hit</wd>

<space/>

<wd l="2122" t="9557" r="2261" b="9715">is</wd>

<space/>

<wd l="2323" t="9557" r="3120" b="9715">essential.</wd>

</ln>

</para>

<para l="1421" t="9811" r="5784" b="12038" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="9811" r="5779" b="10013" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9816" r="1829" b="9965">In</wd>

<space/>

<wd l="2035" t="9811" r="2520" b="10003">short,</wd>

<space/>

<wd l="2726" t="9811" r="2995" b="9970">the</wd>

<space/>

<wd l="3206" t="9811" r="4056" b="10013">algorithm</wd>

<space/>

<wd l="4258" t="9811" r="4949" b="9970">consists</wd>

<space/>

<wd l="5160" t="9811" r="5362" b="9970">of</wd>

<space/>

<wd l="5549" t="9811" r="5779" b="10013">(a)</wd>

<space/>

</ln>

<ln l="1426" t="10066" r="5784" b="10267" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10066" r="2386" b="10267">identifying</wd>

<space/>

<wd l="2472" t="10066" r="3206" b="10267">misspelt</wd>

<space/>

<wd l="3293" t="10066" r="3878" b="10258">words,</wd>

<space/>

<wd l="3970" t="10066" r="4450" b="10267">using</wd>

<space/>

<wd l="4536" t="10066" r="4805" b="10224">the</wd>

<space/>

<wd l="4891" t="10070" r="5784" b="10224">UNITEX-</wd>

</ln>

<ln l="1421" t="10296" r="5779" b="10522" baseLine="10465">

<wd l="1421" t="10296" r="1752" b="10474"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">PB</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">3</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="1886" t="10320" r="2578" b="10512">lexicon;</wd>

<space/>

<wd l="2712" t="10320" r="2894" b="10522">b)</wd>

<space/>

<wd l="3029" t="10320" r="3950" b="10522">generating</wd>

<space/>

<wd l="4080" t="10320" r="5002" b="10478">candidates</wd>

<space/>

<wd l="5136" t="10320" r="5390" b="10478">for</wd>

<space/>

<wd l="5510" t="10320" r="5779" b="10478">the</wd>

<space/>

</run>

</ln>

<ln l="1430" t="10570" r="5784" b="10771" baseLine="10723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="10570" r="2261" b="10728">substitute</wd>

<space/>

<wd l="2438" t="10570" r="2894" b="10728">word</wd>

<space/>

<wd l="3062" t="10570" r="3288" b="10771">by</wd>

<space/>

<wd l="3461" t="10570" r="3936" b="10771">using</wd>

<space/>

<wd l="4109" t="10570" r="4378" b="10728">the</wd>

<space/>

<wd l="4560" t="10570" r="4886" b="10728">edit</wd>

<space/>

<wd l="5074" t="10570" r="5784" b="10728">distance</wd>

<space/>

</ln>

<ln l="1426" t="10824" r="5784" b="11026" baseLine="10978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10824" r="2616" b="11026">(Levenshtein,</wd>

<space/>

<wd l="2688" t="10824" r="3226" b="11026">1966);</wd>

<space/>

<wd l="3288" t="10824" r="3518" b="11026">(c)</wd>

<space/>

<wd l="3566" t="10824" r="4238" b="11026">ranking</wd>

<space/>

<wd l="4277" t="10824" r="4546" b="10982">the</wd>

<space/>

<wd l="4598" t="10824" r="5515" b="10982">candidates</wd>

<space/>

<wd l="5558" t="10824" r="5784" b="11026">by</wd>

<space/>

</ln>

<ln l="1426" t="11078" r="5770" b="11280" baseLine="11227" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11078" r="2448" b="11280">considering</wd>

<space/>

<wd l="2515" t="11078" r="3677" b="11280">corpus-based</wd>

<space/>

<wd l="3744" t="11078" r="4618" b="11280">frequency</wd>

<space/>

<wd l="4690" t="11078" r="5770" b="11270">information;</wd>

<space/>

</ln>

<ln l="1426" t="11328" r="5784" b="11530" baseLine="11482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11328" r="1670" b="11530">(d)</wd>

<space/>

<wd l="1805" t="11328" r="2472" b="11530">looking</wd>

<space/>

<wd l="2602" t="11328" r="2856" b="11486">for</wd>

<space/>

<wd l="2976" t="11328" r="3730" b="11530">phonetic</wd>

<space/>

<wd l="3869" t="11328" r="4834" b="11486">similarities</wd>

<space/>

<wd l="4958" t="11328" r="5184" b="11530">by</wd>

<space/>

<wd l="5304" t="11328" r="5784" b="11530">using</wd>

<space/>

</ln>

<ln l="1430" t="11582" r="5784" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="11582" r="2040" b="11741">several</wd>

<space/>

<wd l="2122" t="11582" r="2794" b="11784">specific</wd>

<space/>

<wd l="2870" t="11582" r="3293" b="11741">rules</wd>

<space/>

<wd l="3374" t="11582" r="3629" b="11741">for</wd>

<space/>

<wd l="3696" t="11587" r="4670" b="11784">Portuguese</wd>

<space/>

<wd l="4752" t="11582" r="5069" b="11741">and</wd>

<space/>

<wd l="5136" t="11582" r="5616" b="11784">using</wd>

<space/>

<wd l="5688" t="11630" r="5784" b="11741">a</wd>

<space/>

</ln>

<ln l="1421" t="11813" r="4608" b="12038" baseLine="11981">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1421" t="11837" r="2203" b="11995">variation</wd>

<space/>

<wd l="2261" t="11837" r="2462" b="11995">of</wd>

<space/>

<wd l="2491" t="11837" r="2760" b="11995">the</wd>

<space/>

</run>

<wd l="2827" t="11813" r="3653" b="11995"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Soundex</run>

<run underlined="none" subsuperscript="superscript" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">4</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3715" t="11837" r="4608" b="12038">algorithm.</wd>

</run>

</ln>

</para>

<para l="1421" t="12091" r="5789" b="13810" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1646" t="12091" r="5789" b="12293" baseLine="12240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12096" r="1958" b="12250">For</wd>

<space/>

<wd l="2299" t="12091" r="3485" b="12283">UGCNormal,</wd>

<space/>

<wd l="3840" t="12139" r="4094" b="12250">we</wd>

<space/>

<wd l="4450" t="12091" r="4920" b="12250">made</wd>

<space/>

<wd l="5270" t="12091" r="5789" b="12293">major</wd>

<space/>

</ln>

<ln l="1426" t="12341" r="5784" b="12542" baseLine="12494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="12341" r="2674" b="12542">improvements</wd>

<space/>

<wd l="2789" t="12360" r="2957" b="12499">to</wd>

<space/>

<wd l="3072" t="12341" r="3341" b="12499">the</wd>

<space/>

<wd l="3461" t="12341" r="4138" b="12542">original</wd>

<space/>

<wd l="4258" t="12341" r="5112" b="12542">algorithm</wd>

<space/>

<wd l="5227" t="12341" r="5429" b="12499">of</wd>

<space/>

<wd l="5520" t="12341" r="5784" b="12499">the</wd>

<space/>

</ln>

<ln l="1430" t="12595" r="5774" b="12797" baseLine="12744" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="12595" r="2054" b="12797">speller,</wd>

<space/>

<wd l="2117" t="12643" r="2294" b="12754">as</wd>

<space/>

<wd l="2352" t="12595" r="2726" b="12754">well</wd>

<space/>

<wd l="2784" t="12643" r="2962" b="12754">as</wd>

<space/>

<wd l="3024" t="12595" r="3778" b="12797">adapting</wd>

<space/>

<wd l="3830" t="12595" r="3950" b="12754">it</wd>

<space/>

<wd l="4003" t="12614" r="4171" b="12754">to</wd>

<space/>

<wd l="4234" t="12595" r="4426" b="12754">fit</wd>

<space/>

<wd l="4483" t="12595" r="4651" b="12749">in</wd>

<space/>

<wd l="4704" t="12595" r="4968" b="12754">the</wd>

<space/>

<wd l="5021" t="12595" r="5774" b="12797">pipeline.</wd>

<space/>

</ln>

<ln l="1421" t="12850" r="5784" b="13051" baseLine="12998" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="12850" r="1656" b="13008">As</wd>

<space/>

<wd l="1723" t="12898" r="2213" b="13051">many</wd>

<space/>

<wd l="2275" t="12850" r="3370" b="13051">misspellings</wd>

<space/>

<wd l="3442" t="12898" r="3706" b="13008">are</wd>

<space/>

<wd l="3768" t="12850" r="4373" b="13008">related</wd>

<space/>

<wd l="4426" t="12869" r="4594" b="13008">to</wd>

<space/>

<wd l="4661" t="12850" r="4925" b="13008">the</wd>

<space/>

<wd l="4992" t="12850" r="5784" b="13008">omission</wd>

<space/>

</ln>

<ln l="1426" t="13099" r="5779" b="13291" baseLine="13253" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13099" r="1627" b="13258">of</wd>

<space/>

<wd l="1742" t="13099" r="2539" b="13258">diacritics</wd>

<space/>

<wd l="2683" t="13099" r="3005" b="13258">and</wd>

<space/>

<wd l="3139" t="13099" r="3725" b="13258">cedilla</wd>

<space/>

<wd l="3854" t="13099" r="4358" b="13258">under</wd>

<space/>

<wd l="4498" t="13099" r="4829" b="13291">“c”,</wd>

<space/>

<wd l="4973" t="13147" r="5227" b="13258">we</wd>

<space/>

<wd l="5366" t="13099" r="5779" b="13258">have</wd>

<space/>

</ln>

<ln l="1426" t="13354" r="5789" b="13555" baseLine="13507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13354" r="2534" b="13555">incorporated</wd>

<space/>

<wd l="2616" t="13402" r="3067" b="13512">some</wd>

<space/>

<wd l="3144" t="13354" r="3984" b="13512">heuristics</wd>

<space/>

<wd l="4066" t="13373" r="4234" b="13512">to</wd>

<space/>

<wd l="4320" t="13373" r="4925" b="13512">correct</wd>

<space/>

<wd l="4997" t="13354" r="5309" b="13512">this</wd>

<space/>

<wd l="5390" t="13354" r="5789" b="13512">kind</wd>

<space/>

</ln>

<ln l="1426" t="13608" r="5275" b="13810" baseLine="13757" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="13608" r="1627" b="13766">of</wd>

<space/>

<wd l="1661" t="13656" r="2088" b="13766">error</wd>

<space/>

<wd l="2136" t="13608" r="2698" b="13766">before</wd>

<space/>

<wd l="2755" t="13608" r="3024" b="13766">the</wd>

<space/>

<wd l="3086" t="13608" r="4008" b="13810">generation</wd>

<space/>

<wd l="4066" t="13608" r="4267" b="13766">of</wd>

<space/>

<wd l="4306" t="13608" r="5275" b="13766">candidates.</wd>

</ln>

</para>

<para l="1426" t="13858" r="5789" b="14568" alignment="justified" spaceAfter="302" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="13858" r="5789" b="14016" baseLine="14011" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="13858" r="1886" b="14016">As</wd>

<space/>

<wd l="1958" t="13858" r="2227" b="14016">the</wd>

<space/>

<wd l="2299" t="13858" r="3187" b="14016">correction</wd>

<space/>

<wd l="3264" t="13858" r="3461" b="14016">of</wd>

<space/>

<wd l="3509" t="13858" r="4368" b="14016">real-word</wd>

<space/>

<wd l="4435" t="13906" r="4939" b="14016">errors</wd>

<space/>

<wd l="5016" t="13858" r="5155" b="14016">is</wd>

<space/>

<wd l="5232" t="13906" r="5328" b="14016">a</wd>

<space/>

<wd l="5395" t="13858" r="5789" b="14016">hard</wd>

<space/>

</ln>

<ln l="1426" t="14112" r="5789" b="14314" baseLine="14266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="14112" r="3043" b="14314">context-dependent</wd>

<space/>

<wd l="3139" t="14112" r="3922" b="14314">problem,</wd>

<space/>

<wd l="4027" t="14112" r="4344" b="14270">this</wd>

<space/>

<wd l="4445" t="14112" r="5789" b="14314">phonetic-based</wd>

<space/>

</ln>

<ln l="1430" t="14366" r="5779" b="14568" baseLine="14515" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="14366" r="2006" b="14568">speller</wd>

<space/>

<wd l="2141" t="14386" r="2722" b="14525">cannot</wd>

<space/>

<wd l="2851" t="14366" r="3437" b="14525">handle</wd>

<space/>

<wd l="3571" t="14366" r="4008" b="14525">them</wd>

<space/>

<wd l="4138" t="14366" r="4560" b="14525">well.</wd>

<space/>

<wd l="4704" t="14371" r="4882" b="14520">In</wd>

<space/>

<wd l="5016" t="14366" r="5482" b="14525">order</wd>

<space/>

<wd l="5611" t="14386" r="5779" b="14525">to</wd>

</ln>

</para>

<rulerline l="1402" t="14899" r="4301" b="14899" type="single" width="19" color="000000"/>

<para l="1416" t="15029" r="5054" b="15422" alignment="left" ri="720" spaceBefore="125" lsp="exactly" lspExact="204" language="en">

<ln l="1426" t="15029" r="5054" b="15216" baseLine="15165">

<run underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="15029" r="1474" b="15115">3</wd>

<space/>

</run>

<wd l="1541" t="15043" r="5054" b="15216" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">http://www.nilc.icmc.usp.br/nilc/projects/unitex-</wd>

</ln>

<ln l="1416" t="15250" r="3168" b="15422" baseLine="15374" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="15250" r="3168" b="15422">pb/web/dicionarios.html</wd>

</ln>

</para>

</column>

<column l="6113" t="1417" r="10524" b="15289">

<para l="6125" t="1464" r="10507" b="4910" alignment="justified" spaceBefore="5" lsp="exactly" lspExact="252" language="en">

<ln l="6130" t="1464" r="10483" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="1512" r="6989" b="1622">overcome</wd>

<space/>

<wd l="7104" t="1464" r="7421" b="1622">this</wd>

<space/>

<wd l="7541" t="1464" r="8438" b="1656">limitation,</wd>

<space/>

<wd l="8558" t="1512" r="8813" b="1622">we</wd>

<space/>

<wd l="8933" t="1464" r="9581" b="1666">applied</wd>

<space/>

<wd l="9696" t="1512" r="9792" b="1622">a</wd>

<space/>

<wd l="9912" t="1464" r="10483" b="1666">simple</wd>

<space/>

</ln>

<ln l="6134" t="1718" r="10483" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6134" t="1738" r="6821" b="1920">strategy</wd>

<space/>

<wd l="6888" t="1718" r="7219" b="1877">that</wd>

<space/>

<wd l="7291" t="1718" r="7939" b="1877">enables</wd>

<space/>

<wd l="8011" t="1718" r="8280" b="1877">the</wd>

<space/>

<wd l="8352" t="1718" r="9240" b="1877">correction</wd>

<space/>

<wd l="9312" t="1718" r="9509" b="1877">of</wd>

<space/>

<wd l="9566" t="1766" r="10018" b="1877">some</wd>

<space/>

<wd l="10085" t="1718" r="10483" b="1877">real-</wd>

</ln>

<ln l="6125" t="1968" r="10493" b="2126" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="1968" r="6581" b="2126">word</wd>

<space/>

<wd l="6662" t="2016" r="7166" b="2126">errors</wd>

<space/>

<wd l="7253" t="1968" r="7925" b="2126">without</wd>

<space/>

<wd l="8011" t="1968" r="8923" b="2126">contextual</wd>

<space/>

<wd l="9014" t="1968" r="10090" b="2126">information.</wd>

<space/>

<wd l="10186" t="1973" r="10493" b="2126">For</wd>

<space/>

</ln>

<ln l="6125" t="2222" r="10488" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="2222" r="6490" b="2414">this,</wd>

<space/>

<wd l="6562" t="2270" r="6816" b="2381">we</wd>

<space/>

<wd l="6874" t="2222" r="7286" b="2381">have</wd>

<space/>

<wd l="7358" t="2222" r="8218" b="2424">compiled,</wd>

<space/>

<wd l="8290" t="2222" r="8717" b="2381">from</wd>

<space/>

<wd l="8774" t="2222" r="9043" b="2381">the</wd>

<space/>

<wd l="9110" t="2222" r="9754" b="2381">lexicon</wd>

<space/>

<wd l="9816" t="2222" r="10488" b="2381">Unitex-</wd>

</ln>

<ln l="6125" t="2477" r="10493" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="2482" r="6442" b="2669">PB,</wd>

<space/>

<wd l="6504" t="2525" r="6600" b="2635">a</wd>

<space/>

<wd l="6648" t="2477" r="6917" b="2635">list</wd>

<space/>

<wd l="6970" t="2477" r="7171" b="2635">of</wd>

<space/>

<wd l="7200" t="2477" r="7795" b="2669">25,722</wd>

<space/>

<wd l="7843" t="2477" r="8275" b="2678">pairs</wd>

<space/>

<wd l="8333" t="2477" r="8534" b="2635">of</wd>

<space/>

<wd l="8558" t="2477" r="9091" b="2635">words</wd>

<space/>

<wd l="9144" t="2477" r="9475" b="2635">that</wd>

<space/>

<wd l="9528" t="2477" r="10018" b="2635">differ</wd>

<space/>

<wd l="10066" t="2477" r="10493" b="2635">from</wd>

<space/>

</ln>

<ln l="6130" t="2726" r="10483" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="2726" r="6528" b="2885">each</wd>

<space/>

<wd l="6590" t="2726" r="7042" b="2885">other</wd>

<space/>

<wd l="7090" t="2726" r="7315" b="2928">by</wd>

<space/>

<wd l="7373" t="2774" r="7469" b="2885">a</wd>

<space/>

<wd l="7536" t="2726" r="8050" b="2928">single</wd>

<space/>

<wd l="8112" t="2726" r="8870" b="2885">diacritic.</wd>

<space/>

<wd l="8942" t="2731" r="9418" b="2885">From</wd>

<space/>

<wd l="9470" t="2726" r="9787" b="2885">this</wd>

<space/>

<wd l="9850" t="2726" r="10166" b="2918">list,</wd>

<space/>

<wd l="10229" t="2774" r="10483" b="2885">we</wd>

<space/>

</ln>

<ln l="6130" t="2981" r="10488" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="2981" r="6907" b="3182">analyzed</wd>

<space/>

<wd l="7008" t="2981" r="7277" b="3139">the</wd>

<space/>

<wd l="7378" t="2981" r="7805" b="3182">pairs</wd>

<space/>

<wd l="7910" t="2981" r="8242" b="3139">that</wd>

<space/>

<wd l="8347" t="2981" r="8832" b="3139">differ</wd>

<space/>

<wd l="8933" t="2981" r="9101" b="3134">in</wd>

<space/>

<wd l="9206" t="2981" r="10488" b="3182">morphological</wd>

<space/>

</ln>

<ln l="6125" t="3235" r="10493" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="3254" r="6475" b="3437">tags</wd>

<space/>

<wd l="6571" t="3235" r="7258" b="3437">(2,877),</wd>

<space/>

<wd l="7354" t="3235" r="7670" b="3394">and</wd>

<space/>

<wd l="7762" t="3235" r="8467" b="3394">selected</wd>

<space/>

<wd l="8554" t="3235" r="8851" b="3394">561</wd>

<space/>

<wd l="8957" t="3235" r="9384" b="3437">pairs</wd>

<space/>

<wd l="9480" t="3235" r="9682" b="3394">of</wd>

<space/>

<wd l="9749" t="3283" r="9845" b="3394">a</wd>

<space/>

<wd l="9931" t="3235" r="10493" b="3437">highly</wd>

<space/>

</ln>

<ln l="6130" t="3490" r="10474" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="3490" r="6859" b="3691">frequent</wd>

<space/>

<wd l="6931" t="3490" r="7387" b="3648">word</wd>

<space/>

<wd l="7459" t="3490" r="7781" b="3648">and</wd>

<space/>

<wd l="7853" t="3538" r="7949" b="3648">a</wd>

<space/>

<wd l="8016" t="3490" r="8582" b="3691">highly</wd>

<space/>

<wd l="8654" t="3490" r="9557" b="3691">infrequent</wd>

<space/>

<wd l="9629" t="3490" r="10085" b="3648">word</wd>

<space/>

<wd l="10157" t="3490" r="10474" b="3691">(eg.</wd>

<space/>

</ln>

<ln l="6130" t="3739" r="10507" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="3739" r="6821" b="3898">“óbvio”</wd>

<space/>

<wd l="6888" t="3739" r="7843" b="3941">(=obvious)</wd>

<space/>

<wd l="7920" t="3739" r="8237" b="3898">and</wd>

<space/>

<wd l="8299" t="3739" r="8986" b="3898">“obvio”</wd>

<space/>

<wd l="9058" t="3739" r="9336" b="3941">(an</wd>

<space/>

<wd l="9403" t="3739" r="10243" b="3898">inflection</wd>

<space/>

<wd l="10306" t="3739" r="10507" b="3898">of</wd>

<space/>

</ln>

<ln l="6130" t="3994" r="10483" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="3994" r="7176" b="4152">“obviar”=to</wd>

<space/>

<wd l="7282" t="3994" r="8040" b="4195">obviate).</wd>

<space/>

<wd l="8155" t="3994" r="8491" b="4152">The</wd>

<space/>

<wd l="8592" t="3994" r="9494" b="4195">infrequent</wd>

<space/>

<wd l="9595" t="3994" r="10051" b="4152">word</wd>

<space/>

<wd l="10147" t="4042" r="10483" b="4152">was</wd>

<space/>

</ln>

<ln l="6125" t="4248" r="10488" b="4406" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="4248" r="6504" b="4406">then</wd>

<space/>

<wd l="6566" t="4248" r="7358" b="4406">excluded</wd>

<space/>

<wd l="7416" t="4248" r="7843" b="4406">from</wd>

<space/>

<wd l="7896" t="4248" r="8165" b="4406">the</wd>

<space/>

<wd l="8227" t="4248" r="8875" b="4406">lexicon</wd>

<space/>

<wd l="8938" t="4248" r="9106" b="4402">in</wd>

<space/>

<wd l="9168" t="4248" r="9634" b="4406">order</wd>

<space/>

<wd l="9686" t="4267" r="9854" b="4406">to</wd>

<space/>

<wd l="9917" t="4248" r="10488" b="4406">enable</wd>

<space/>

</ln>

<ln l="6125" t="4498" r="10493" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6125" t="4498" r="6394" b="4656">the</wd>

<space/>

<wd l="6456" t="4498" r="7037" b="4699">speller</wd>

<space/>

<wd l="7080" t="4517" r="7248" b="4656">to</wd>

<space/>

<wd l="7310" t="4498" r="8222" b="4699">eventually</wd>

<space/>

<wd l="8275" t="4517" r="8885" b="4656">correct</wd>

<space/>

<wd l="8933" t="4498" r="9202" b="4656">the</wd>

<space/>

<wd l="9254" t="4546" r="9706" b="4656">more</wd>

<space/>

<wd l="9763" t="4498" r="10493" b="4699">frequent</wd>

<space/>

</ln>

<ln l="6130" t="4762" r="6485" b="4910" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6130" t="4800" r="6485" b="4910">one.</wd>

</ln>

</para>

<para l="6125" t="5006" r="10512" b="6470" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="5006" r="10488" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5006" r="6696" b="5165">The</wd>

<space/>

<wd l="6778" t="5006" r="7670" b="5208">remaining</wd>

<space/>

<wd l="7742" t="5006" r="8174" b="5208">pairs</wd>

<space/>

<wd l="8266" t="5054" r="8525" b="5165">are</wd>

<space/>

<wd l="8606" t="5026" r="8890" b="5165">not</wd>

<space/>

<wd l="8976" t="5006" r="9845" b="5165">addressed</wd>

<space/>

<wd l="9917" t="5006" r="10142" b="5208">by</wd>

<space/>

<wd l="10219" t="5006" r="10488" b="5165">the</wd>

<space/>

</ln>

<ln l="6125" t="5261" r="10474" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5261" r="6466" b="5419">tool</wd>

<space/>

<wd l="6557" t="5261" r="6994" b="5419">since</wd>

<space/>

<wd l="7075" t="5261" r="7339" b="5419">the</wd>

<space/>

<wd l="7421" t="5261" r="8299" b="5462">frequency</wd>

<space/>

<wd l="8376" t="5261" r="8578" b="5419">of</wd>

<space/>

<wd l="8635" t="5261" r="8904" b="5419">the</wd>

<space/>

<wd l="8981" t="5261" r="9518" b="5419">words</wd>

<space/>

<wd l="9600" t="5261" r="9739" b="5419">is</wd>

<space/>

<wd l="9830" t="5261" r="10474" b="5419">similar.</wd>

<space/>

</ln>

<ln l="6130" t="5510" r="10512" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5510" r="6466" b="5669">The</wd>

<space/>

<wd l="6557" t="5530" r="6984" b="5669">most</wd>

<space/>

<wd l="7080" t="5510" r="7690" b="5669">serious</wd>

<space/>

<wd l="7781" t="5510" r="8520" b="5712">problem</wd>

<space/>

<wd l="8606" t="5510" r="8746" b="5669">is</wd>

<space/>

<wd l="8842" t="5510" r="9442" b="5669">related</wd>

<space/>

<wd l="9528" t="5530" r="9691" b="5669">to</wd>

<space/>

<wd l="9787" t="5510" r="10214" b="5712">pairs</wd>

<space/>

<wd l="10310" t="5510" r="10512" b="5669">of</wd>

<space/>

</ln>

<ln l="6130" t="5765" r="10488" b="5966" baseLine="5914">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6130" t="5765" r="6859" b="5966">frequent</wd>

<space/>

<wd l="6902" t="5765" r="7483" b="5957">words,</wd>

<space/>

<wd l="7536" t="5765" r="7858" b="5923">like</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7910" t="5765" r="8198" b="5923">“e”</wd>

<space/>

<wd l="8246" t="5765" r="8822" b="5966">(=and)</wd>

<space/>

<wd l="8875" t="5765" r="9197" b="5923">and</wd>

<space/>

<wd l="9235" t="5765" r="9523" b="5923">“é”</wd>

<space/>

<wd l="9571" t="5765" r="10032" b="5966">(=is);</wd>

<space/>

<wd l="10094" t="5765" r="10488" b="5923">“da”</wd>

<space/>

</run>

</ln>

<ln l="6130" t="6019" r="10483" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6019" r="6528" b="6221">(=of</wd>

<space/>

<wd l="6629" t="6019" r="6965" b="6221">the)</wd>

<space/>

<wd l="7099" t="6019" r="7421" b="6178">and</wd>

<space/>

<wd l="7546" t="6019" r="7944" b="6178">“dá”</wd>

<space/>

<wd l="8074" t="6019" r="8563" b="6221">(third</wd>

<space/>

<wd l="8678" t="6067" r="9269" b="6221">person</wd>

<space/>

<wd l="9398" t="6019" r="9600" b="6178">of</wd>

<space/>

<wd l="9701" t="6019" r="9970" b="6178">the</wd>

<space/>

<wd l="10099" t="6019" r="10483" b="6178">verb</wd>

<space/>

</ln>

<ln l="6130" t="6269" r="7445" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6269" r="6893" b="6427">“dar”=to</wd>

<space/>

<wd l="6955" t="6269" r="7445" b="6470">give).</wd>

</ln>

</para>

<para l="6120" t="6523" r="10507" b="9000" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6350" t="6523" r="10488" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6350" t="6523" r="7075" b="6682">Another</wd>

<space/>

<wd l="7147" t="6523" r="8270" b="6682">modification</wd>

<space/>

<wd l="8347" t="6571" r="8683" b="6682">was</wd>

<space/>

<wd l="8760" t="6523" r="9235" b="6682">made</wd>

<space/>

<wd l="9317" t="6523" r="9485" b="6677">in</wd>

<space/>

<wd l="9557" t="6523" r="9826" b="6682">the</wd>

<space/>

<wd l="9912" t="6523" r="10488" b="6725">speller</wd>

<space/>

</ln>

<ln l="6125" t="6778" r="10493" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6125" t="6797" r="6293" b="6936">to</wd>

<space/>

<wd l="6341" t="6797" r="7003" b="6979">prevent</wd>

<space/>

<wd l="7051" t="6778" r="7320" b="6936">the</wd>

<space/>

<wd l="7373" t="6778" r="8256" b="6936">correction</wd>

<space/>

<wd l="8309" t="6778" r="8510" b="6936">of</wd>

<space/>

<wd l="8539" t="6826" r="9384" b="6979">acronyms</wd>

<space/>

<wd l="9442" t="6778" r="9758" b="6936">and</wd>

<space/>

<wd l="9811" t="6782" r="10493" b="6936">Internet</wd>

<space/>

</ln>

<ln l="6134" t="7027" r="10488" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6134" t="7027" r="6634" b="7229">slang.</wd>

<space/>

<wd l="6706" t="7027" r="7382" b="7229">Foreign</wd>

<space/>

<wd l="7450" t="7027" r="7819" b="7186">loan</wd>

<space/>

<wd l="7882" t="7027" r="8414" b="7186">words</wd>

<space/>

<wd l="8482" t="7027" r="8798" b="7186">and</wd>

<space/>

<wd l="8851" t="7075" r="9432" b="7229">proper</wd>

<space/>

<wd l="9490" t="7075" r="10013" b="7186">nouns</wd>

<space/>

<wd l="10075" t="7027" r="10488" b="7186">have</wd>

<space/>

</ln>

<ln l="6120" t="7282" r="10488" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6120" t="7282" r="6542" b="7440">been</wd>

<space/>

<wd l="6600" t="7282" r="7709" b="7483">incorporated</wd>

<space/>

<wd l="7762" t="7301" r="7930" b="7440">to</wd>

<space/>

<wd l="7987" t="7282" r="8256" b="7440">the</wd>

<space/>

<wd l="8314" t="7282" r="9010" b="7474">lexicon,</wd>

<space/>

<wd l="9072" t="7282" r="9610" b="7440">which</wd>

<space/>

<wd l="9667" t="7282" r="9802" b="7440">is</wd>

<space/>

<wd l="9864" t="7282" r="10272" b="7440">used</wd>

<space/>

<wd l="10320" t="7301" r="10488" b="7440">to</wd>

<space/>

</ln>

<ln l="6130" t="7536" r="10483" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6130" t="7536" r="6811" b="7738">identify</wd>

<space/>

<wd l="6998" t="7536" r="7949" b="7738">misspelled</wd>

<space/>

<wd l="8136" t="7536" r="8669" b="7694">words</wd>

<space/>

<wd l="8870" t="7536" r="9192" b="7694">and</wd>

<space/>

<wd l="9379" t="7555" r="9547" b="7694">to</wd>

<space/>

<wd l="9749" t="7555" r="10483" b="7738">generate</wd>

<space/>

</ln>

<ln l="6130" t="7790" r="10488" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6130" t="7790" r="7046" b="7949">candidates</wd>

<space/>

<wd l="7262" t="7790" r="7517" b="7949">for</wd>

<space/>

<wd l="7723" t="7790" r="8736" b="7992">misspelling</wd>

<space/>

<wd l="8942" t="7790" r="9878" b="7949">correction.</wd>

<space/>

<wd l="10104" t="7790" r="10488" b="7949">This</wd>

<space/>

</ln>

<ln l="6130" t="8040" r="10507" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6130" t="8040" r="6859" b="8198">decision</wd>

<space/>

<wd l="6922" t="8088" r="7262" b="8198">was</wd>

<space/>

<wd l="7334" t="8040" r="8218" b="8198">motivated</wd>

<space/>

<wd l="8275" t="8040" r="8501" b="8242">by</wd>

<space/>

<wd l="8563" t="8040" r="8832" b="8198">the</wd>

<space/>

<wd l="8904" t="8040" r="9293" b="8242">high</wd>

<space/>

<wd l="9360" t="8040" r="10238" b="8242">frequency</wd>

<space/>

<wd l="10306" t="8040" r="10507" b="8198">of</wd>

<space/>

</ln>

<ln l="6125" t="8294" r="10507" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6125" t="8294" r="7070" b="8496">misspelled</wd>

<space/>

<wd l="7162" t="8294" r="8141" b="8496">technology</wd>

<space/>

<wd l="8218" t="8294" r="8803" b="8496">jargon</wd>

<space/>

<wd l="8909" t="8294" r="9077" b="8448">in</wd>

<space/>

<wd l="9178" t="8294" r="9442" b="8453">the</wd>

<space/>

<wd l="9552" t="8294" r="10205" b="8453">domain</wd>

<space/>

<wd l="10310" t="8294" r="10507" b="8453">of</wd>

<space/>

</ln>

<ln l="6120" t="8549" r="10483" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6120" t="8549" r="6797" b="8750">product</wd>

<space/>

<wd l="6850" t="8549" r="7531" b="8707">reviews</wd>

<space/>

<wd l="7594" t="8549" r="7910" b="8750">(eg.</wd>

<space/>

<wd l="7982" t="8549" r="8741" b="8750">“desing”</wd>

<space/>

<wd l="8803" t="8549" r="9432" b="8707">instead</wd>

<space/>

<wd l="9485" t="8549" r="9686" b="8707">of</wd>

<space/>

<wd l="9720" t="8549" r="10483" b="8750">“design”</wd>

<space/>

</ln>

<ln l="6130" t="8798" r="9504" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6130" t="8798" r="6446" b="8957">and</wd>

<space/>

<wd l="6504" t="8798" r="7354" b="8957">“Blutoth”</wd>

<space/>

<wd l="7411" t="8798" r="8035" b="8957">instead</wd>

<space/>

<wd l="8088" t="8798" r="8290" b="8957">of</wd>

<space/>

<wd l="8328" t="8798" r="9504" b="9000">“Bluetooth”).</wd>

</ln>

</para>

<para l="6125" t="9053" r="10512" b="10771" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="9053" r="10493" b="9254" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="9053" r="6696" b="9211">The</wd>

<space/>

<wd l="6811" t="9053" r="7387" b="9211">lexical</wd>

<space/>

<wd l="7502" t="9101" r="8381" b="9245">resources,</wd>

<space/>

<wd l="8506" t="9053" r="9139" b="9211">created</wd>

<space/>

<wd l="9250" t="9053" r="10123" b="9254">especially</wd>

<space/>

<wd l="10238" t="9053" r="10493" b="9211">for</wd>

<space/>

</ln>

<ln l="6125" t="9307" r="10488" b="9509" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9307" r="6490" b="9499">this,</wd>

<space/>

<wd l="6557" t="9307" r="7402" b="9509">comprise:</wd>

<space/>

<wd l="7483" t="9312" r="8160" b="9466">Internet</wd>

<space/>

<wd l="8227" t="9307" r="8678" b="9509">slang</wd>

<space/>

<wd l="8736" t="9307" r="9134" b="9509">(420</wd>

<space/>

<wd l="9197" t="9307" r="9792" b="9509">items),</wd>

<space/>

<wd l="9859" t="9307" r="10488" b="9509">foreign</wd>

<space/>

</ln>

<ln l="6130" t="9557" r="10488" b="9758" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9557" r="6499" b="9715">loan</wd>

<space/>

<wd l="6614" t="9557" r="7147" b="9715">words</wd>

<space/>

<wd l="7262" t="9557" r="7656" b="9758">(248</wd>

<space/>

<wd l="7776" t="9557" r="8376" b="9758">items),</wd>

<space/>

<wd l="8486" t="9605" r="9067" b="9758">proper</wd>

<space/>

<wd l="9178" t="9605" r="9696" b="9715">nouns</wd>

<space/>

<wd l="9816" t="9557" r="10488" b="9758">(20,730</wd>

<space/>

</ln>

<ln l="6130" t="9811" r="10512" b="10013" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9811" r="6725" b="10013">items),</wd>

<space/>

<wd l="6811" t="9811" r="7133" b="9970">and</wd>

<space/>

<wd l="7205" t="9859" r="8050" b="10013">acronyms</wd>

<space/>

<wd l="8136" t="9811" r="8530" b="10013">(156</wd>

<space/>

<wd l="8616" t="9811" r="9206" b="10013">items).</wd>

<space/>

<wd l="9298" t="9811" r="9816" b="9970">These</wd>

<space/>

<wd l="9907" t="9830" r="10224" b="9970">sets</wd>

<space/>

<wd l="10310" t="9811" r="10512" b="9970">of</wd>

<space/>

</ln>

<ln l="6130" t="10066" r="10483" b="10267" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10066" r="6595" b="10224">items</wd>

<space/>

<wd l="6653" t="10114" r="7080" b="10224">were</wd>

<space/>

<wd l="7133" t="10066" r="7872" b="10267">partially</wd>

<space/>

<wd l="7930" t="10066" r="8750" b="10267">compiled</wd>

<space/>

<wd l="8794" t="10066" r="9019" b="10267">by</wd>

<space/>

<wd l="9072" t="10070" r="9955" b="10224">Hartmann</wd>

<space/>

<wd l="10013" t="10085" r="10166" b="10224">et</wd>

<space/>

<wd l="10229" t="10066" r="10483" b="10258">al.,</wd>

<space/>

</ln>

<ln l="6130" t="10320" r="10488" b="10522" baseLine="10469" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10320" r="6706" b="10522">(2014)</wd>

<space/>

<wd l="6859" t="10320" r="7181" b="10478">and</wd>

<space/>

<wd l="7325" t="10320" r="7920" b="10478">further</wd>

<space/>

<wd l="8064" t="10320" r="9360" b="10522">complemented</wd>

<space/>

<wd l="9504" t="10320" r="10080" b="10522">during</wd>

<space/>

<wd l="10219" t="10320" r="10488" b="10478">the</wd>

<space/>

</ln>

<ln l="6130" t="10570" r="8078" b="10771" baseLine="10723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10570" r="6830" b="10771">analysis</wd>

<space/>

<wd l="6893" t="10570" r="7094" b="10728">of</wd>

<space/>

<wd l="7123" t="10570" r="7392" b="10728">the</wd>

<space/>

<wd l="7454" t="10618" r="8078" b="10771">corpus.</wd>

</ln>

</para>

<para l="6125" t="10824" r="10512" b="14568" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6355" t="10824" r="10488" b="11026" baseLine="10978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6355" t="10824" r="6696" b="10982">The</wd>

<space/>

<wd l="6787" t="10824" r="7445" b="10982">module</wd>

<space/>

<wd l="7531" t="10824" r="8870" b="11026">Acronym_Map</wd>

<space/>

<wd l="8971" t="10843" r="9293" b="10982">sets</wd>

<space/>

<wd l="9389" t="10824" r="9600" b="10982">all</wd>

<space/>

<wd l="9701" t="10824" r="10229" b="10982">letters</wd>

<space/>

<wd l="10320" t="10843" r="10488" b="10982">to</wd>

<space/>

</ln>

<ln l="6125" t="11078" r="10488" b="11280" baseLine="11227" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="11126" r="7003" b="11280">uppercase</wd>

<space/>

<wd l="7099" t="11078" r="7958" b="11237">whenever</wd>

<space/>

<wd l="8054" t="11078" r="8174" b="11237">it</wd>

<space/>

<wd l="8275" t="11078" r="8875" b="11237">detects</wd>

<space/>

<wd l="8981" t="11126" r="9178" b="11237">an</wd>

<space/>

<wd l="9283" t="11126" r="10051" b="11280">acronym</wd>

<space/>

<wd l="10147" t="11078" r="10488" b="11280">(the</wd>

<space/>

</ln>

<ln l="6130" t="11328" r="10478" b="11530" baseLine="11482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="11328" r="6931" b="11486">detection</wd>

<space/>

<wd l="7022" t="11328" r="7224" b="11486">of</wd>

<space/>

<wd l="7291" t="11376" r="8131" b="11530">acronyms</wd>

<space/>

<wd l="8227" t="11328" r="8366" b="11486">is</wd>

<space/>

<wd l="8453" t="11328" r="8966" b="11486">based</wd>

<space/>

<wd l="9058" t="11376" r="9274" b="11486">on</wd>

<space/>

<wd l="9355" t="11328" r="9624" b="11486">the</wd>

<space/>

<wd l="9720" t="11328" r="10478" b="11530">lexicon).</wd>

<space/>

</ln>

<ln l="6130" t="11582" r="10493" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="11582" r="6466" b="11741">The</wd>

<space/>

<wd l="6514" t="11582" r="7171" b="11741">module</wd>

<space/>

<wd l="7229" t="11582" r="8222" b="11784">Slang_Map</wd>

<space/>

<wd l="8285" t="11582" r="9202" b="11741">substitutes</wd>

<space/>

<wd l="9259" t="11630" r="9710" b="11741">some</wd>

<space/>

<wd l="9763" t="11582" r="10493" b="11784">frequent</wd>

<space/>

</ln>

<ln l="6134" t="11837" r="10493" b="12038" baseLine="11986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="11837" r="6590" b="12038">slang</wd>

<space/>

<wd l="6739" t="11837" r="7277" b="11995">words</wd>

<space/>

<wd l="7426" t="11837" r="7651" b="12038">by</wd>

<space/>

<wd l="7800" t="11837" r="8208" b="11995">their</wd>

<space/>

<wd l="8362" t="11837" r="9274" b="12038">equivalent</wd>

<space/>

<wd l="9427" t="11837" r="9595" b="11990">in</wd>

<space/>

<wd l="9754" t="11837" r="10493" b="11995">standard</wd>

<space/>

</ln>

<ln l="6130" t="12091" r="10488" b="12293" baseLine="12240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="12091" r="6917" b="12293">language</wd>

<space/>

<wd l="7018" t="12091" r="7339" b="12250">and</wd>

<space/>

<wd l="7430" t="12091" r="8390" b="12250">normalizes</wd>

<space/>

<wd l="8496" t="12091" r="8880" b="12293">long</wd>

<space/>

<wd l="8976" t="12091" r="9595" b="12250">vowels</wd>

<space/>

<wd l="9691" t="12091" r="9917" b="12293">by</wd>

<space/>

<wd l="10008" t="12091" r="10488" b="12293">using</wd>

<space/>

</ln>

<ln l="6125" t="12341" r="10512" b="12542" baseLine="12494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="12341" r="6749" b="12542">regular</wd>

<space/>

<wd l="6902" t="12341" r="7968" b="12542">expressions.</wd>

<space/>

<wd l="8131" t="12341" r="8640" b="12499">There</wd>

<space/>

<wd l="8798" t="12389" r="9062" b="12499">are</wd>

<space/>

<wd l="9211" t="12360" r="9538" b="12499">two</wd>

<space/>

<wd l="9691" t="12360" r="10152" b="12542">types</wd>

<space/>

<wd l="10310" t="12341" r="10512" b="12499">of</wd>

<space/>

</ln>

<ln l="6130" t="12595" r="10493" b="12797" baseLine="12744" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="12600" r="6806" b="12754">Internet</wd>

<space/>

<wd l="6869" t="12595" r="7450" b="12797">slangs:</wd>

<space/>

<wd l="7546" t="12595" r="7699" b="12797">1)</wd>

<space/>

<wd l="7757" t="12595" r="8222" b="12754">those</wd>

<space/>

<wd l="8275" t="12595" r="8606" b="12754">that</wd>

<space/>

<wd l="8664" t="12643" r="8957" b="12754">can</wd>

<space/>

<wd l="9010" t="12595" r="9221" b="12754">be</wd>

<space/>

<wd l="9278" t="12595" r="10123" b="12754">identified</wd>

<space/>

<wd l="10176" t="12595" r="10339" b="12749">in</wd>

<space/>

<wd l="10397" t="12643" r="10493" b="12754">a</wd>

<space/>

</ln>

<ln l="6130" t="12850" r="10488" b="13051" baseLine="12998">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6130" t="12850" r="7291" b="13008">lexical-based</wd>

<space/>

<wd l="7382" t="12850" r="8184" b="13051">approach</wd>

<space/>

<wd l="8285" t="12850" r="8602" b="13051">(eg.</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="8712" t="12850" r="9893" b="13042">“vc”=“você”;</wd>

<space/>

<wd l="10003" t="12850" r="10488" b="13008">“tb”=</wd>

<space/>

</run>

</ln>

<ln l="6130" t="13099" r="10488" b="13301" baseLine="13253">

<wd l="6130" t="13099" r="7147" b="13301"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“também”)</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="7219" t="13099" r="7536" b="13258">and</wd>

<space/>

<wd l="7594" t="13099" r="7766" b="13301">2)</wd>

<space/>

<wd l="7829" t="13099" r="8294" b="13258">those</wd>

<space/>

<wd l="8352" t="13099" r="8683" b="13258">that</wd>

<space/>

<wd l="8741" t="13099" r="9154" b="13258">have</wd>

<space/>

<wd l="9221" t="13147" r="9317" b="13258">a</wd>

<space/>

<wd l="9370" t="13099" r="10262" b="13301">homonym</wd>

<space/>

<wd l="10320" t="13099" r="10488" b="13253">in</wd>

<space/>

</run>

</ln>

<ln l="6125" t="13354" r="10488" b="13555" baseLine="13507">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6125" t="13354" r="6394" b="13512">the</wd>

<space/>

<wd l="6528" t="13354" r="7267" b="13512">standard</wd>

<space/>

<wd l="7392" t="13354" r="8232" b="13555">language,</wd>

<space/>

<wd l="8366" t="13402" r="8544" b="13512">as</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="8674" t="13354" r="9197" b="13512">“fala”</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="9326" t="13354" r="9494" b="13507">in</wd>

<space/>

</run>

<wd l="9624" t="13354" r="9931" b="13512"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">vo</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<wd l="10066" t="13354" r="10488" b="13512"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">fala</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

</ln>

<ln l="6130" t="13608" r="10488" b="13810" baseLine="13757">

<wd l="6130" t="13608" r="6749" b="13810"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">(</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">=“vou</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<wd l="6840" t="13608" r="7536" b="13766"><run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">falar”</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">=I</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="7618" t="13608" r="7958" b="13766">will</wd>

<space/>

<wd l="8059" t="13608" r="8616" b="13810">speak)</wd>

<space/>

<wd l="8717" t="13608" r="9034" b="13766">and</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="9125" t="13608" r="9643" b="13766">“fala”</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="9734" t="13608" r="10488" b="13810">(=he/she</wd>

<space/>

</run>

</ln>

<ln l="6134" t="13858" r="10488" b="14059" baseLine="14011" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="13858" r="6758" b="14059">speaks;</wd>

<space/>

<wd l="6907" t="13858" r="7618" b="14059">speech).</wd>

<space/>

<wd l="7757" t="13862" r="8179" b="14016">Here</wd>

<space/>

<wd l="8309" t="13906" r="8563" b="14016">we</wd>

<space/>

<wd l="8698" t="13858" r="9058" b="14016">deal</wd>

<space/>

<wd l="9187" t="13858" r="9576" b="14059">only</wd>

<space/>

<wd l="9701" t="13858" r="10094" b="14016">with</wd>

<space/>

<wd l="10219" t="13858" r="10488" b="14016">the</wd>

<space/>

</ln>

<ln l="6130" t="14112" r="10488" b="14304" baseLine="14266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="14112" r="7018" b="14270">correction</wd>

<space/>

<wd l="7114" t="14112" r="7315" b="14270">of</wd>

<space/>

<wd l="7387" t="14112" r="7656" b="14270">the</wd>

<space/>

<wd l="7757" t="14112" r="8107" b="14270">first</wd>

<space/>

<wd l="8203" t="14112" r="8640" b="14304">kind,</wd>

<space/>

<wd l="8750" t="14160" r="8923" b="14270">as</wd>

<space/>

<wd l="9024" t="14112" r="9288" b="14270">the</wd>

<space/>

<wd l="9398" t="14112" r="10003" b="14270">second</wd>

<space/>

<wd l="10094" t="14112" r="10488" b="14270">kind</wd>

<space/>

</ln>

<ln l="6125" t="14366" r="10498" b="14568" baseLine="14515" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="14366" r="6830" b="14568">requires</wd>

<space/>

<wd l="6922" t="14386" r="7565" b="14525">context</wd>

<space/>

<wd l="7646" t="14366" r="8611" b="14568">knowledge</wd>

<space/>

<wd l="8698" t="14386" r="8866" b="14525">to</wd>

<space/>

<wd l="8952" t="14366" r="9163" b="14525">be</wd>

<space/>

<wd l="9250" t="14366" r="10094" b="14525">identified</wd>

<space/>

<wd l="10181" t="14366" r="10498" b="14525">and</wd>

</ln>

</para>

<para l="6125" t="15024" r="10205" b="15216" alignment="left" spaceBefore="437" spaceAfter="36" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="15024" r="10205" b="15216" baseLine="15165">

<run underlined="none" subsuperscript="none" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="15029" r="6182" b="15110">4</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6230" t="15043" r="10205" b="15216">http://www.archives.gov/research/census/soundex.html</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="1402" t="15736" r="10524" b="15977">

<para l="5800" t="15792" r="6148" b="15941" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15792" r="6082" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="43">

<wd l="5866" t="15792" r="6082" b="15941">42</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1408" marginTop="1417" marginRight="1379" marginBottom="1302" offsetX="-8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1408" t="1417" r="10530" b="15427">

<column l="1408" t="1417" r="5819" b="15427">

<para l="1421" t="1464" r="5789" b="2170" alignment="justified" spaceBefore="3" lsp="exactly" lspExact="252" language="en">

<ln l="1426" t="1464" r="5784" b="1622" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="1464" r="2280" b="1622">corrected.</wd>

<space/>

<wd l="2448" t="1464" r="2726" b="1618">All</wd>

<space/>

<wd l="2885" t="1464" r="3336" b="1622">these</wd>

<space/>

<wd l="3494" t="1464" r="4238" b="1622">modules</wd>

<space/>

<wd l="4397" t="1512" r="4690" b="1622">use</wd>

<space/>

<wd l="4843" t="1464" r="5251" b="1622">their</wd>

<space/>

<wd l="5410" t="1512" r="5784" b="1622">own</wd>

<space/>

</ln>

<ln l="1426" t="1718" r="5789" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="1718" r="2150" b="1877">lexicons</wd>

<space/>

<wd l="2208" t="1766" r="2381" b="1877">as</wd>

<space/>

<wd l="2438" t="1718" r="2813" b="1877">well</wd>

<space/>

<wd l="2870" t="1766" r="3043" b="1877">as</wd>

<space/>

<wd l="3106" t="1766" r="3202" b="1877">a</wd>

<space/>

<wd l="3259" t="1738" r="3494" b="1877">set</wd>

<space/>

<wd l="3547" t="1718" r="3749" b="1877">of</wd>

<space/>

<wd l="3778" t="1718" r="4402" b="1920">regular</wd>

<space/>

<wd l="4454" t="1718" r="5472" b="1920">expressions</wd>

<space/>

<wd l="5530" t="1718" r="5789" b="1877">for</wd>

<space/>

</ln>

<ln l="1421" t="1968" r="3355" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1968" r="2458" b="2170">recognizing</wd>

<space/>

<wd l="2510" t="1968" r="2779" b="2126">the</wd>

<space/>

<wd l="2842" t="1968" r="3355" b="2126">items.</wd>

</ln>

</para>

<para l="1416" t="2222" r="5794" b="7738" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="2222" r="5789" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2222" r="1987" b="2381">The</wd>

<space/>

<wd l="2117" t="2222" r="2419" b="2381">last</wd>

<space/>

<wd l="2544" t="2222" r="3250" b="2414">module,</wd>

<space/>

<wd l="3379" t="2227" r="5054" b="2424">ProperName_Map,</wd>

<space/>

<wd l="5189" t="2270" r="5563" b="2381">uses</wd>

<space/>

<wd l="5693" t="2270" r="5789" b="2381">a</wd>

<space/>

</ln>

<ln l="1426" t="2477" r="5770" b="2669" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2477" r="2069" b="2635">lexicon</wd>

<space/>

<wd l="2122" t="2477" r="2323" b="2635">of</wd>

<space/>

<wd l="2347" t="2477" r="2938" b="2635">named</wd>

<space/>

<wd l="2990" t="2477" r="3667" b="2669">entities,</wd>

<space/>

<wd l="3720" t="2477" r="4262" b="2635">which</wd>

<space/>

<wd l="4315" t="2477" r="5002" b="2635">consists</wd>

<space/>

<wd l="5059" t="2477" r="5261" b="2635">of</wd>

<space/>

<wd l="5299" t="2477" r="5770" b="2669">8,465</wd>

<space/>

</ln>

<ln l="1416" t="2726" r="5784" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="2774" r="1997" b="2928">proper</wd>

<space/>

<wd l="2069" t="2774" r="2592" b="2885">nouns</wd>

<space/>

<wd l="2678" t="2726" r="3106" b="2885">from</wd>

<space/>

<wd l="3178" t="2726" r="3446" b="2885">the</wd>

<space/>

<wd l="3518" t="2726" r="4032" b="2885">NILC</wd>

<space/>

<wd l="4114" t="2726" r="4838" b="2885">Lexicon</wd>

<space/>

<wd l="4920" t="2726" r="5544" b="2928">(Nunes</wd>

<space/>

<wd l="5630" t="2746" r="5784" b="2885">et</wd>

<space/>

</ln>

<ln l="1426" t="2981" r="5770" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2981" r="1680" b="3173">al.,</wd>

<space/>

<wd l="1790" t="2981" r="2323" b="3182">1996).</wd>

<space/>

<wd l="2414" t="2986" r="2717" b="3139">We</wd>

<space/>

<wd l="2798" t="2981" r="3206" b="3139">have</wd>

<space/>

<wd l="3293" t="2981" r="3638" b="3139">also</wd>

<space/>

<wd l="3730" t="2981" r="4253" b="3139">added</wd>

<space/>

<wd l="4334" t="3029" r="4430" b="3139">a</wd>

<space/>

<wd l="4507" t="2981" r="5107" b="3139">further</wd>

<space/>

<wd l="5203" t="2981" r="5770" b="3173">12,265</wd>

<space/>

</ln>

<ln l="1416" t="3235" r="5779" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="3283" r="1997" b="3437">proper</wd>

<space/>

<wd l="2174" t="3283" r="2746" b="3427">nouns,</wd>

<space/>

<wd l="2938" t="3235" r="3826" b="3437">consisting</wd>

<space/>

<wd l="4008" t="3235" r="4210" b="3394">of</wd>

<space/>

<wd l="4363" t="3235" r="5045" b="3437">product</wd>

<space/>

<wd l="5222" t="3283" r="5779" b="3394">names</wd>

<space/>

</ln>

<ln l="1426" t="3490" r="5784" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="3490" r="2251" b="3691">including</wd>

<space/>

<wd l="2438" t="3490" r="3029" b="3648">brands</wd>

<space/>

<wd l="3235" t="3490" r="3552" b="3648">and</wd>

<space/>

<wd l="3744" t="3490" r="4426" b="3648">models.</wd>

<space/>

<wd l="4637" t="3490" r="5160" b="3648">These</wd>

<space/>

<wd l="5362" t="3538" r="5784" b="3648">were</wd>

<space/>

</ln>

<ln l="1426" t="3739" r="5779" b="3898" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="3739" r="2232" b="3898">extracted</wd>

<space/>

<wd l="2366" t="3739" r="2794" b="3898">from</wd>

<space/>

<wd l="2928" t="3739" r="3197" b="3898">the</wd>

<space/>

<wd l="3341" t="3739" r="4138" b="3898">metadata</wd>

<space/>

<wd l="4277" t="3739" r="5064" b="3898">available</wd>

<space/>

<wd l="5208" t="3739" r="5376" b="3893">in</wd>

<space/>

<wd l="5510" t="3739" r="5779" b="3898">the</wd>

<space/>

</ln>

<ln l="1421" t="3994" r="5794" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="3994" r="2165" b="4195">Buscapé</wd>

<space/>

<wd l="2218" t="4042" r="2846" b="4195">corpus,</wd>

<space/>

<wd l="2909" t="3994" r="3226" b="4152">and</wd>

<space/>

<wd l="3269" t="3994" r="3538" b="4152">the</wd>

<space/>

<wd l="3590" t="3994" r="4310" b="4152">addition</wd>

<space/>

<wd l="4363" t="3994" r="4560" b="4152">of</wd>

<space/>

<wd l="4589" t="3994" r="5040" b="4152">these</wd>

<space/>

<wd l="5088" t="3994" r="5794" b="4152">resulted</wd>

<space/>

</ln>

<ln l="1426" t="4248" r="5784" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="4248" r="1589" b="4402">in</wd>

<space/>

<wd l="1680" t="4248" r="2275" b="4440">20,730</wd>

<space/>

<wd l="2371" t="4248" r="2947" b="4406">lexical</wd>

<space/>

<wd l="3043" t="4248" r="3557" b="4406">items.</wd>

<space/>

<wd l="3658" t="4248" r="4181" b="4406">When</wd>

<space/>

<wd l="4272" t="4296" r="4368" b="4406">a</wd>

<space/>

<wd l="4445" t="4296" r="5030" b="4450">proper</wd>

<space/>

<wd l="5112" t="4296" r="5554" b="4406">noun</wd>

<space/>

<wd l="5645" t="4248" r="5784" b="4406">is</wd>

<space/>

</ln>

<ln l="1421" t="4498" r="5779" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="4498" r="2434" b="4699">recognized,</wd>

<space/>

<wd l="2520" t="4498" r="2832" b="4656">this</wd>

<space/>

<wd l="2918" t="4498" r="3576" b="4656">module</wd>

<space/>

<wd l="3662" t="4498" r="4584" b="4699">capitalizes</wd>

<space/>

<wd l="4670" t="4498" r="4834" b="4656">it.</wd>

<space/>

<wd l="4925" t="4502" r="5779" b="4690">However,</wd>

<space/>

</ln>

<ln l="1426" t="4752" r="5779" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="4752" r="2227" b="4910">detection</wd>

<space/>

<wd l="2294" t="4752" r="2496" b="4910">of</wd>

<space/>

<wd l="2534" t="4800" r="3115" b="4954">proper</wd>

<space/>

<wd l="3178" t="4800" r="3701" b="4910">nouns</wd>

<space/>

<wd l="3768" t="4752" r="4392" b="4910">written</wd>

<space/>

<wd l="4459" t="4752" r="4627" b="4906">in</wd>

<space/>

<wd l="4699" t="4752" r="5573" b="4910">lowercase</wd>

<space/>

<wd l="5640" t="4752" r="5779" b="4910">is</wd>

<space/>

</ln>

<ln l="1426" t="5006" r="5784" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5006" r="1666" b="5165">far</wd>

<space/>

<wd l="1786" t="5006" r="2213" b="5165">from</wd>

<space/>

<wd l="2333" t="5054" r="2429" b="5165">a</wd>

<space/>

<wd l="2558" t="5006" r="3134" b="5208">simple</wd>

<space/>

<wd l="3254" t="5006" r="3658" b="5198">task,</wd>

<space/>

<wd l="3778" t="5006" r="4478" b="5165">because</wd>

<space/>

<wd l="4603" t="5054" r="5088" b="5208">many</wd>

<space/>

<wd l="5203" t="5054" r="5784" b="5208">proper</wd>

<space/>

</ln>

<ln l="1421" t="5261" r="5779" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5309" r="1944" b="5419">nouns</wd>

<space/>

<wd l="2045" t="5309" r="2309" b="5419">are</wd>

<space/>

<wd l="2410" t="5261" r="2755" b="5419">also</wd>

<space/>

<wd l="2861" t="5309" r="3624" b="5419">common</wd>

<space/>

<wd l="3720" t="5261" r="4258" b="5419">words</wd>

<space/>

<wd l="4363" t="5261" r="4531" b="5414">in</wd>

<space/>

<wd l="4622" t="5261" r="4891" b="5419">the</wd>

<space/>

<wd l="4992" t="5261" r="5779" b="5462">language</wd>

<space/>

</ln>

<ln l="1426" t="5510" r="5784" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5510" r="2117" b="5702">lexicon,</wd>

<space/>

<wd l="2246" t="5558" r="2424" b="5669">as</wd>

<space/>

<wd l="2544" t="5510" r="3480" b="5669">mentioned</wd>

<space/>

<wd l="3600" t="5510" r="3768" b="5664">in</wd>

<space/>

<wd l="3893" t="5510" r="4546" b="5669">Section</wd>

<space/>

<wd l="4670" t="5510" r="4819" b="5669">3.</wd>

<space/>

<wd l="4954" t="5510" r="5784" b="5712">Although</wd>

<space/>

</ln>

<ln l="1421" t="5765" r="5774" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5765" r="1858" b="5923">there</wd>

<space/>

<wd l="1963" t="5813" r="2222" b="5923">are</wd>

<space/>

<wd l="2333" t="5813" r="2784" b="5923">some</wd>

<space/>

<wd l="2885" t="5765" r="3475" b="5923">named</wd>

<space/>

<wd l="3576" t="5765" r="4075" b="5966">entity</wd>

<space/>

<wd l="4171" t="5765" r="5102" b="5966">recognizer</wd>

<space/>

<wd l="5203" t="5765" r="5774" b="5966">(NER)</wd>

<space/>

</ln>

<ln l="1430" t="6019" r="5779" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="6038" r="2107" b="6221">systems</wd>

<space/>

<wd l="2179" t="6019" r="2434" b="6178">for</wd>

<space/>

<wd l="2491" t="6024" r="3518" b="6221">Portuguese,</wd>

<space/>

<wd l="3581" t="6019" r="3960" b="6221">they</wd>

<space/>

<wd l="4022" t="6019" r="4234" b="6178">do</wd>

<space/>

<wd l="4296" t="6038" r="4579" b="6178">not</wd>

<space/>

<wd l="4632" t="6019" r="5347" b="6221">perform</wd>

<space/>

<wd l="5400" t="6019" r="5779" b="6178">well</wd>

<space/>

</ln>

<ln l="1426" t="6269" r="5779" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6269" r="1680" b="6427">for</wd>

<space/>

<wd l="1858" t="6269" r="2371" b="6461">UGC,</wd>

<space/>

<wd l="2568" t="6269" r="3010" b="6427">since</wd>

<space/>

<wd l="3197" t="6269" r="3576" b="6470">they</wd>

<space/>

<wd l="3754" t="6269" r="4402" b="6470">heavily</wd>

<space/>

<wd l="4584" t="6269" r="4930" b="6470">rely</wd>

<space/>

<wd l="5112" t="6317" r="5328" b="6427">on</wd>

<space/>

<wd l="5510" t="6269" r="5779" b="6427">the</wd>

<space/>

</ln>

<ln l="1426" t="6523" r="5784" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="6571" r="2381" b="6682">occurrence</wd>

<space/>

<wd l="2472" t="6523" r="2674" b="6682">of</wd>

<space/>

<wd l="2736" t="6571" r="2832" b="6682">a</wd>

<space/>

<wd l="2918" t="6523" r="3494" b="6725">capital</wd>

<space/>

<wd l="3586" t="6523" r="4032" b="6682">letter</wd>

<space/>

<wd l="4123" t="6523" r="4776" b="6725">starting</wd>

<space/>

<wd l="4858" t="6523" r="5122" b="6682">the</wd>

<space/>

<wd l="5203" t="6571" r="5784" b="6725">proper</wd>

<space/>

</ln>

<ln l="1421" t="6778" r="5784" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6826" r="1997" b="6970">nouns,</wd>

<space/>

<wd l="2083" t="6778" r="2400" b="6936">and</wd>

<space/>

<wd l="2472" t="6778" r="2741" b="6936">the</wd>

<space/>

<wd l="2818" t="6778" r="3557" b="6979">problem</wd>

<space/>

<wd l="3634" t="6778" r="3773" b="6936">is</wd>

<space/>

<wd l="3859" t="6778" r="4027" b="6931">in</wd>

<space/>

<wd l="4114" t="6778" r="5136" b="6979">discovering</wd>

<space/>

<wd l="5208" t="6826" r="5784" b="6979">proper</wd>

<space/>

</ln>

<ln l="1421" t="7027" r="5779" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7075" r="1944" b="7186">nouns</wd>

<space/>

<wd l="2040" t="7027" r="2371" b="7186">that</wd>

<space/>

<wd l="2472" t="7075" r="2731" b="7186">are</wd>

<space/>

<wd l="2832" t="7046" r="3110" b="7186">not</wd>

<space/>

<wd l="3211" t="7027" r="4205" b="7229">capitalized.</wd>

<space/>

<wd l="4315" t="7027" r="4718" b="7186">That</wd>

<space/>

<wd l="4814" t="7027" r="4954" b="7186">is</wd>

<space/>

<wd l="5054" t="7027" r="5434" b="7229">why</wd>

<space/>

<wd l="5525" t="7075" r="5779" b="7186">we</wd>

<space/>

</ln>

<ln l="1421" t="7282" r="5794" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="7282" r="1834" b="7440">have</wd>

<space/>

<wd l="2040" t="7282" r="2741" b="7483">adopted</wd>

<space/>

<wd l="2942" t="7330" r="3038" b="7440">a</wd>

<space/>

<wd l="3245" t="7282" r="3898" b="7440">domain</wd>

<space/>

<wd l="4109" t="7282" r="4426" b="7440">and</wd>

<space/>

<wd l="4627" t="7282" r="5794" b="7440">lexical-based</wd>

<space/>

</ln>

<ln l="1426" t="7536" r="2270" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="7536" r="2270" b="7738">approach.</wd>

</ln>

</para>

<para l="1426" t="8002" r="4272" b="8218" alignment="left" spaceBefore="207" lsp="exactly" lspExact="279" language="en">

<ln l="1426" t="8002" r="4272" b="8218" baseLine="8160" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="1426" t="8002" r="1594" b="8170">5.</wd>

<space/>

<wd l="1786" t="8002" r="3096" b="8218">UGCNormal</wd>

<space/>

<wd l="3158" t="8002" r="4272" b="8170">Evaluation</wd>

</ln>

</para>

<para l="1421" t="8424" r="5794" b="9130" alignment="justified" spaceBefore="152" lsp="exactly" lspExact="253" language="en">

<ln l="1421" t="8424" r="5774" b="8626" baseLine="8578" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8429" r="1723" b="8582">We</wd>

<space/>

<wd l="1786" t="8424" r="2630" b="8582">evaluated</wd>

<space/>

<wd l="2678" t="8424" r="2947" b="8582">the</wd>

<space/>

<wd l="3005" t="8424" r="4224" b="8582">normalization</wd>

<space/>

<wd l="4282" t="8424" r="4618" b="8582">tool</wd>

<space/>

<wd l="4685" t="8424" r="5774" b="8626">intrinsically,</wd>

<space/>

</ln>

<ln l="1426" t="8678" r="5794" b="8880" baseLine="8827" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8678" r="1589" b="8832">in</wd>

<space/>

<wd l="1642" t="8698" r="1968" b="8837">two</wd>

<space/>

<wd l="2030" t="8726" r="2659" b="8880">corpus,</wd>

<space/>

<wd l="2722" t="8678" r="3038" b="8837">and</wd>

<space/>

<wd l="3091" t="8678" r="4224" b="8880">extrinsically,</wd>

<space/>

<wd l="4286" t="8678" r="4454" b="8832">in</wd>

<space/>

<wd l="4512" t="8726" r="4608" b="8837">a</wd>

<space/>

<wd l="4656" t="8678" r="5054" b="8837">POS</wd>

<space/>

<wd l="5112" t="8698" r="5381" b="8880">tag</wd>

<space/>

<wd l="5434" t="8678" r="5794" b="8837">task</wd>

<space/>

</ln>

<ln l="1426" t="8928" r="3941" b="9130" baseLine="9082" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8928" r="1742" b="9086">and</wd>

<space/>

<wd l="1795" t="8928" r="1963" b="9082">in</wd>

<space/>

<wd l="2021" t="8976" r="2227" b="9086">an</wd>

<space/>

<wd l="2285" t="8928" r="3000" b="9130">Opinion</wd>

<space/>

<wd l="3062" t="8928" r="3941" b="9086">Classifier.</wd>

</ln>

</para>

<para l="1426" t="9350" r="3691" b="9509" alignment="left" spaceBefore="168" lsp="exactly" lspExact="249" language="en">

<ln l="1426" t="9350" r="3691" b="9509" baseLine="9499" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="9350" r="1742" b="9509">5.1.</wd>

<space/>

<wd l="1805" t="9350" r="2611" b="9509">Intrinsic</wd>

<space/>

<wd l="2669" t="9350" r="3691" b="9509">Evaluation</wd>

</ln>

</para>

<para l="1421" t="9715" r="5808" b="11688" alignment="justified" spaceBefore="117" lsp="exactly" lspExact="252" language="en">

<ln l="1426" t="9715" r="5774" b="9917" baseLine="9869" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="9720" r="1598" b="9869">In</wd>

<space/>

<wd l="1670" t="9715" r="1939" b="9874">the</wd>

<space/>

<wd l="2021" t="9715" r="2736" b="9874">intrinsic</wd>

<space/>

<wd l="2822" t="9715" r="3734" b="9874">evaluation</wd>

<space/>

<wd l="3806" t="9763" r="4061" b="9874">we</wd>

<space/>

<wd l="4138" t="9715" r="4546" b="9874">used</wd>

<space/>

<wd l="4613" t="9734" r="4939" b="9874">two</wd>

<space/>

<wd l="5026" t="9715" r="5774" b="9917">samples,</wd>

<space/>

</ln>

<ln l="1426" t="9970" r="5789" b="10171" baseLine="10118" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10018" r="1738" b="10128">one</wd>

<space/>

<wd l="1872" t="9970" r="2294" b="10128">from</wd>

<space/>

<wd l="2419" t="9970" r="2688" b="10128">the</wd>

<space/>

<wd l="2818" t="9970" r="3562" b="10171">Buscapé</wd>

<space/>

<wd l="3696" t="10018" r="4330" b="10171">corpus,</wd>

<space/>

<wd l="4469" t="9970" r="4790" b="10128">and</wd>

<space/>

<wd l="4915" t="10018" r="5227" b="10128">one</wd>

<space/>

<wd l="5362" t="9970" r="5789" b="10128">from</wd>

<space/>

</ln>

<ln l="1426" t="10224" r="5784" b="10426" baseLine="10373" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10224" r="2083" b="10382">another</wd>

<space/>

<wd l="2146" t="10272" r="2722" b="10426">corpus</wd>

<space/>

<wd l="2794" t="10224" r="2995" b="10382">of</wd>

<space/>

<wd l="3034" t="10224" r="3302" b="10382">the</wd>

<space/>

<wd l="3374" t="10272" r="3816" b="10382">same</wd>

<space/>

<wd l="3883" t="10272" r="4421" b="10426">genre,</wd>

<space/>

<wd l="4493" t="10224" r="5299" b="10382">extracted</wd>

<space/>

<wd l="5362" t="10224" r="5784" b="10382">from</wd>

<space/>

</ln>

<ln l="1421" t="10474" r="5789" b="10666" baseLine="10627" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10474" r="1685" b="10632">the</wd>

<space/>

<wd l="1790" t="10522" r="2870" b="10632">e-commerce</wd>

<space/>

<wd l="2971" t="10474" r="3638" b="10632">website</wd>

<space/>

<wd l="3739" t="10474" r="4517" b="10632">Mercado</wd>

<space/>

<wd l="4618" t="10474" r="5146" b="10666">Livre,</wd>

<space/>

<wd l="5251" t="10474" r="5789" b="10632">which</wd>

<space/>

</ln>

<ln l="1426" t="10728" r="5808" b="10930" baseLine="10882" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10728" r="2357" b="10886">constitutes</wd>

<space/>

<wd l="2410" t="10776" r="3019" b="10886">unseen</wd>

<space/>

<wd l="3077" t="10728" r="3480" b="10886">data.</wd>

<space/>

<wd l="3547" t="10733" r="3720" b="10882">In</wd>

<space/>

<wd l="3768" t="10728" r="4166" b="10886">both</wd>

<space/>

<wd l="4219" t="10776" r="4728" b="10920">cases,</wd>

<space/>

<wd l="4790" t="10776" r="4886" b="10886">a</wd>

<space/>

<wd l="4939" t="10728" r="5554" b="10930">sample</wd>

<space/>

<wd l="5611" t="10728" r="5808" b="10886">of</wd>

<space/>

</ln>

<ln l="1430" t="10982" r="5784" b="11184" baseLine="11131" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="10982" r="1637" b="11141">60</wd>

<space/>

<wd l="1699" t="10982" r="2376" b="11184">product</wd>

<space/>

<wd l="2438" t="10982" r="3120" b="11141">reviews</wd>

<space/>

<wd l="3187" t="11030" r="3523" b="11141">was</wd>

<space/>

<wd l="3590" t="10982" r="4411" b="11184">manually</wd>

<space/>

<wd l="4478" t="10982" r="5333" b="11141">annotated</wd>

<space/>

<wd l="5395" t="10982" r="5784" b="11141">with</wd>

<space/>

</ln>

<ln l="1421" t="11232" r="5794" b="11434" baseLine="11386" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11251" r="2045" b="11434">respect</wd>

<space/>

<wd l="2194" t="11251" r="2362" b="11390">to</wd>

<space/>

<wd l="2515" t="11232" r="3557" b="11434">punctuation</wd>

<space/>

<wd l="3715" t="11280" r="4277" b="11424">errors,</wd>

<space/>

<wd l="4440" t="11280" r="4814" b="11390">case</wd>

<space/>

<wd l="4963" t="11280" r="5309" b="11424">use,</wd>

<space/>

<wd l="5472" t="11232" r="5794" b="11390">and</wd>

<space/>

</ln>

<ln l="1421" t="11486" r="2563" b="11688" baseLine="11640" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11486" r="2563" b="11688">misspellings.</wd>

</ln>

</para>

<para l="1426" t="11741" r="5789" b="12197" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="11741" r="5789" b="11942" baseLine="11890" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="11741" r="1992" b="11899">Our</wd>

<space/>

<wd l="2059" t="11760" r="2386" b="11899">two</wd>

<space/>

<wd l="2467" t="11741" r="3163" b="11942">samples</wd>

<space/>

<wd l="3240" t="11741" r="3979" b="11942">(random</wd>

<space/>

<wd l="4056" t="11741" r="4829" b="11899">selection</wd>

<space/>

<wd l="4901" t="11741" r="5323" b="11899">from</wd>

<space/>

<wd l="5390" t="11741" r="5789" b="11899">both</wd>

<space/>

</ln>

<ln l="1426" t="11995" r="4378" b="12197" baseLine="12144" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11995" r="2155" b="12197">corpora)</wd>

<space/>

<wd l="2222" t="12043" r="2486" b="12154">are</wd>

<space/>

<wd l="2549" t="11995" r="3389" b="12154">described</wd>

<space/>

<wd l="3446" t="11995" r="3614" b="12149">in</wd>

<space/>

<wd l="3672" t="11995" r="4166" b="12154">Table</wd>

<space/>

<wd l="4248" t="11995" r="4378" b="12154">1.</wd>

</ln>

</para>

<para l="1651" t="12499" r="4056" b="12701" alignment="left" li="216" spaceBefore="252" lsp="exactly" lspExact="247" language="en">

<ln l="1651" t="12499" r="4056" b="12701" baseLine="12648">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="12499" r="2146" b="12658">Table</wd>

<space/>

<wd l="2227" t="12499" r="2357" b="12658">1:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2438" t="12499" r="3230" b="12701">Samples’</wd>

<space/>

<wd l="3312" t="12499" r="4056" b="12658">statistics</wd>

</run>

</ln>

</para>

<table l="1416" t="12706" r="5818" b="15336" alignment="left" li="8" ri="1" spaceAfter="91">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<gridTable>

<gridCol>2270</gridCol>

<gridCol>994</gridCol>

<gridCol>1138</gridCol>

<gridRow>917</gridRow>

<gridRow>307</gridRow>

<gridRow>312</gridRow>

<gridRow>307</gridRow>

<gridRow>471</gridRow>

<gridRow>316</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1416" t="12706" r="3686" b="13623" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="3754" t="12763" r="4464" b="13176" alignment="left" li="72" spaceAfter="438" lsp="exactly" lspExact="230" language="en">

<ln l="3754" t="12763" r="4464" b="12946" baseLine="12898" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3754" t="12763" r="4464" b="12946">Buscapé</wd>

<space/>

</ln>

<ln l="3763" t="12994" r="4387" b="13176" baseLine="13128" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3763" t="12994" r="4387" b="13176">Sample</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4752" t="12768" r="5515" b="13406" alignment="left" li="72" spaceAfter="208" lsp="exactly" lspExact="230" language="en">

<ln l="4752" t="12768" r="5515" b="12907" baseLine="12898" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4752" t="12768" r="5515" b="12907">Mercado</wd>

<space/>

</ln>

<ln l="4752" t="12994" r="5213" b="13138" baseLine="13128" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4752" t="12994" r="5213" b="13138">Livre</wd>

<space/>

</ln>

<ln l="4762" t="13224" r="5381" b="13406" baseLine="13358" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4762" t="13224" r="5381" b="13406">Sample</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1493" t="13738" r="2107" b="13882" alignment="left" li="77" spaceBefore="75" spaceAfter="6" lsp="exactly" lspExact="220" language="en">

<ln l="1493" t="13738" r="2107" b="13882" baseLine="13872" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1493" t="13738" r="2107" b="13882">reviews</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4416" t="13738" r="4608" b="13882" alignment="right" ri="72" spaceBefore="75" spaceAfter="6" lsp="exactly" lspExact="220" language="en">

<ln l="4416" t="13738" r="4608" b="13882" baseLine="13872" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4416" t="13738" r="4608" b="13882">60</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5549" t="13738" r="5741" b="13882" alignment="right" ri="77" spaceBefore="75" spaceAfter="6" lsp="exactly" lspExact="220" language="en">

<ln l="5549" t="13738" r="5741" b="13882" baseLine="13872" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="5549" t="13738" r="5741" b="13882">60</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1493" t="14045" r="2006" b="14189" alignment="left" li="77" spaceBefore="80" spaceAfter="11" lsp="exactly" lspExact="220" language="en">

<ln l="1493" t="14045" r="2006" b="14189" baseLine="14184" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1493" t="14045" r="2006" b="14189">tokens</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4162" t="14045" r="4598" b="14218" alignment="right" ri="72" spaceBefore="80" spaceAfter="11" lsp="exactly" lspExact="220" language="en">

<ln l="4162" t="14045" r="4598" b="14218" baseLine="14184" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4162" t="14045" r="4598" b="14218">3,179</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5299" t="14045" r="5736" b="14218" alignment="right" ri="77" spaceBefore="80" spaceAfter="11" lsp="exactly" lspExact="220" language="en">

<ln l="5299" t="14045" r="5736" b="14218" baseLine="14184" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="5299" t="14045" r="5736" b="14218">3,897</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1493" t="14357" r="3605" b="14539" alignment="left" li="77" spaceBefore="80" spaceAfter="2" lsp="exactly" lspExact="220" language="en">

<ln l="1493" t="14357" r="3605" b="14539" baseLine="14496" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1493" t="14357" r="2006" b="14501">tokens</wd>

<space/>

<wd l="2064" t="14357" r="2674" b="14501">without</wd>

<space/>

<wd l="2731" t="14357" r="3605" b="14539">stop-words</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4157" t="14357" r="4584" b="14530" alignment="right" ri="72" spaceBefore="80" spaceAfter="2" lsp="exactly" lspExact="220" language="en">

<ln l="4157" t="14357" r="4584" b="14530" baseLine="14496" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4157" t="14357" r="4584" b="14530">2,061</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5294" t="14357" r="5736" b="14530" alignment="right" ri="77" spaceBefore="80" spaceAfter="2" lsp="exactly" lspExact="220" language="en">

<ln l="5294" t="14357" r="5736" b="14530" baseLine="14496" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="5294" t="14357" r="5736" b="14530">2,732</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1493" t="14597" r="3605" b="15010" alignment="left" li="72" spaceAfter="9" lsp="exactly" lspExact="226" language="en">

<ln l="1493" t="14597" r="3605" b="14779" baseLine="14736" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1493" t="14597" r="2006" b="14741">tokens</wd>

<space/>

<wd l="2064" t="14597" r="2674" b="14741">without</wd>

<space/>

<wd l="2731" t="14597" r="3605" b="14779">stop-words</wd>

<space/>

</ln>

<ln l="1498" t="14827" r="3307" b="15010" baseLine="14962" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1498" t="14827" r="1781" b="14971">and</wd>

<space/>

<wd l="1824" t="14827" r="2774" b="15010">punctuation</wd>

<space/>

<wd l="2827" t="14827" r="3307" b="14971">marks</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="bottom">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4176" t="14827" r="4594" b="15000" alignment="right" ri="72" spaceBefore="239" spaceAfter="11" lsp="exactly" lspExact="220" language="en">

<ln l="4176" t="14827" r="4594" b="15000" baseLine="14962" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="4176" t="14827" r="4594" b="15000">1,563</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="bottom">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5314" t="14827" r="5736" b="15000" alignment="right" ri="77" spaceBefore="239" spaceAfter="11" lsp="exactly" lspExact="220" language="en">

<ln l="5314" t="14827" r="5736" b="15000" baseLine="14962" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="5314" t="14827" r="5736" b="15000">1,967</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1493" t="15144" r="1906" b="15322" alignment="left" li="77" spaceBefore="80" spaceAfter="14" lsp="exactly" lspExact="222" language="en">

<ln l="1493" t="15144" r="1906" b="15322" baseLine="15274" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1493" t="15158" r="1906" b="15322">types</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4320" t="15139" r="4603" b="15283" alignment="right" ri="72" spaceBefore="80" spaceAfter="16" lsp="exactly" lspExact="220" language="en">

<ln l="4320" t="15139" r="4603" b="15283" baseLine="15274" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4320" t="15139" r="4603" b="15283">887</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="10"/>

<topBorder type="single" width="10"/>

<rightBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="5314" t="15139" r="5741" b="15312" alignment="right" ri="77" spaceBefore="80" spaceAfter="16" lsp="exactly" lspExact="220" language="en">

<ln l="5314" t="15139" r="5741" b="15312" baseLine="15274" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="5314" t="15139" r="5741" b="15312">1,096</wd>

</ln>

</para>

</cell>

</table>

</column>

<column l="6119" t="1417" r="10530" b="15427">

<para l="6125" t="1464" r="10512" b="5462" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6355" t="1464" r="10483" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6355" t="1464" r="6854" b="1622">Table</wd>

<space/>

<wd l="6902" t="1464" r="7003" b="1618">2</wd>

<space/>

<wd l="7056" t="1464" r="7594" b="1622">shows</wd>

<space/>

<wd l="7637" t="1464" r="7906" b="1622">the</wd>

<space/>

<wd l="7949" t="1464" r="8434" b="1622">recall</wd>

<space/>

<wd l="8482" t="1464" r="9082" b="1666">figures</wd>

<space/>

<wd l="9134" t="1464" r="9331" b="1622">of</wd>

<space/>

<wd l="9350" t="1464" r="10483" b="1622">UGCNormal</wd>

<space/>

</ln>

<ln l="6130" t="1718" r="10483" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="1718" r="6298" b="1872">in</wd>

<space/>

<wd l="6379" t="1718" r="6773" b="1877">both</wd>

<space/>

<wd l="6869" t="1718" r="7613" b="1920">samples.</wd>

<space/>

<wd l="7714" t="1718" r="8054" b="1877">The</wd>

<space/>

<wd l="8150" t="1718" r="8755" b="1877">second</wd>

<space/>

<wd l="8842" t="1718" r="9158" b="1877">and</wd>

<space/>

<wd l="9240" t="1718" r="9662" b="1877">third</wd>

<space/>

<wd l="9749" t="1718" r="10483" b="1877">columns</wd>

<space/>

</ln>

<ln l="6130" t="1968" r="10512" b="2160" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="1968" r="6773" b="2126">contain</wd>

<space/>

<wd l="6864" t="1968" r="7546" b="2160">X/Y=Z,</wd>

<space/>

<wd l="7646" t="1968" r="8184" b="2126">where</wd>

<space/>

<wd l="8280" t="1973" r="8438" b="2122">X</wd>

<space/>

<wd l="8539" t="1968" r="9077" b="2126">shows</wd>

<space/>

<wd l="9178" t="1968" r="9442" b="2126">the</wd>

<space/>

<wd l="9538" t="1968" r="10214" b="2126">number</wd>

<space/>

<wd l="10310" t="1968" r="10512" b="2126">of</wd>

<space/>

</ln>

<ln l="6130" t="2222" r="10512" b="2414" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="2222" r="6595" b="2381">items</wd>

<space/>

<wd l="6682" t="2242" r="6850" b="2381">to</wd>

<space/>

<wd l="6926" t="2222" r="7133" b="2381">be</wd>

<space/>

<wd l="7214" t="2222" r="8251" b="2414">normalized,</wd>

<space/>

<wd l="8338" t="2227" r="8496" b="2376">Y</wd>

<space/>

<wd l="8587" t="2222" r="9125" b="2381">shows</wd>

<space/>

<wd l="9206" t="2222" r="9475" b="2381">the</wd>

<space/>

<wd l="9557" t="2222" r="10229" b="2381">number</wd>

<space/>

<wd l="10310" t="2222" r="10512" b="2381">of</wd>

<space/>

</ln>

<ln l="6130" t="2477" r="10488" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="2477" r="6907" b="2678">correctly</wd>

<space/>

<wd l="7037" t="2477" r="8035" b="2635">normalized</wd>

<space/>

<wd l="8165" t="2477" r="8683" b="2669">items,</wd>

<space/>

<wd l="8827" t="2477" r="9144" b="2635">and</wd>

<space/>

<wd l="9274" t="2482" r="9403" b="2630">Z</wd>

<space/>

<wd l="9547" t="2477" r="10085" b="2635">shows</wd>

<space/>

<wd l="10219" t="2477" r="10488" b="2635">the</wd>

<space/>

</ln>

<ln l="6130" t="2726" r="10488" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="2726" r="7387" b="2928">corresponding</wd>

<space/>

<wd l="7502" t="2774" r="8280" b="2928">accuracy</wd>

<space/>

<wd l="8395" t="2746" r="8774" b="2885">rate.</wd>

<space/>

<wd l="8904" t="2726" r="9144" b="2885">As</wd>

<space/>

<wd l="9274" t="2726" r="10099" b="2928">expected,</wd>

<space/>

<wd l="10224" t="2726" r="10488" b="2885">the</wd>

<space/>

</ln>

<ln l="6125" t="2981" r="10483" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="2981" r="6696" b="3139">results</wd>

<space/>

<wd l="6758" t="2981" r="6926" b="3134">in</wd>

<space/>

<wd l="6979" t="2981" r="7248" b="3139">the</wd>

<space/>

<wd l="7306" t="2981" r="8045" b="3182">Buscapé</wd>

<space/>

<wd l="8107" t="3029" r="8683" b="3182">corpus</wd>

<space/>

<wd l="8750" t="2981" r="9226" b="3182">(used</wd>

<space/>

<wd l="9278" t="2981" r="9533" b="3139">for</wd>

<space/>

<wd l="9590" t="2981" r="10483" b="3182">diagnosis)</wd>

<space/>

</ln>

<ln l="6130" t="3235" r="10488" b="3427" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="3283" r="6394" b="3394">are</wd>

<space/>

<wd l="6470" t="3235" r="6979" b="3394">better</wd>

<space/>

<wd l="7056" t="3235" r="7430" b="3394">than</wd>

<space/>

<wd l="7522" t="3235" r="7690" b="3389">in</wd>

<space/>

<wd l="7771" t="3235" r="8549" b="3394">Mercado</wd>

<space/>

<wd l="8635" t="3235" r="9158" b="3427">Livre,</wd>

<space/>

<wd l="9245" t="3235" r="9946" b="3394">because</wd>

<space/>

<wd l="10037" t="3283" r="10488" b="3394">some</wd>

<space/>

</ln>

<ln l="6130" t="3490" r="10478" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="3490" r="6706" b="3648">lexical</wd>

<space/>

<wd l="6782" t="3538" r="7608" b="3648">resources</wd>

<space/>

<wd l="7690" t="3538" r="8112" b="3648">were</wd>

<space/>

<wd l="8194" t="3490" r="9206" b="3648">constructed</wd>

<space/>

<wd l="9283" t="3490" r="9706" b="3648">from</wd>

<space/>

<wd l="9782" t="3490" r="10478" b="3691">analysis</wd>

<space/>

</ln>

<ln l="6130" t="3739" r="10483" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="3739" r="6331" b="3898">of</wd>

<space/>

<wd l="6442" t="3739" r="6907" b="3898">OOV</wd>

<space/>

<wd l="7037" t="3739" r="7574" b="3898">words</wd>

<space/>

<wd l="7709" t="3739" r="7877" b="3893">in</wd>

<space/>

<wd l="8002" t="3739" r="8789" b="3941">Buscapé.</wd>

<space/>

<wd l="8933" t="3744" r="9110" b="3893">In</wd>

<space/>

<wd l="9245" t="3739" r="9653" b="3941">spite</wd>

<space/>

<wd l="9787" t="3739" r="9989" b="3898">of</wd>

<space/>

<wd l="10090" t="3739" r="10483" b="3898">both</wd>

<space/>

</ln>

<ln l="6134" t="3994" r="10488" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="3994" r="6826" b="4195">samples</wd>

<space/>

<wd l="6898" t="3994" r="7498" b="4195">having</wd>

<space/>

<wd l="7565" t="3994" r="7834" b="4152">the</wd>

<space/>

<wd l="7910" t="4042" r="8352" b="4152">same</wd>

<space/>

<wd l="8419" t="3994" r="9096" b="4152">number</wd>

<space/>

<wd l="9163" t="3994" r="9365" b="4152">of</wd>

<space/>

<wd l="9413" t="3994" r="10147" b="4186">reviews,</wd>

<space/>

<wd l="10224" t="3994" r="10488" b="4152">the</wd>

<space/>

</ln>

<ln l="6125" t="4248" r="10493" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="4248" r="6902" b="4406">Mercado</wd>

<space/>

<wd l="7027" t="4248" r="7502" b="4406">Livre</wd>

<space/>

<wd l="7637" t="4248" r="8251" b="4450">sample</wd>

<space/>

<wd l="8381" t="4248" r="9106" b="4406">contains</wd>

<space/>

<wd l="9226" t="4248" r="10493" b="4450">proportionally</wd>

<space/>

</ln>

<ln l="6125" t="4498" r="10488" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="4546" r="6571" b="4656">more</wd>

<space/>

<wd l="6677" t="4498" r="7142" b="4656">items</wd>

<space/>

<wd l="7243" t="4517" r="7411" b="4656">to</wd>

<space/>

<wd l="7507" t="4498" r="7718" b="4656">be</wd>

<space/>

<wd l="7814" t="4498" r="8808" b="4656">normalized</wd>

<space/>

<wd l="8899" t="4498" r="9283" b="4656">than</wd>

<space/>

<wd l="9379" t="4498" r="9643" b="4656">the</wd>

<space/>

<wd l="9744" t="4498" r="10488" b="4699">Buscapé</wd>

<space/>

</ln>

<ln l="6134" t="4752" r="10483" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="4752" r="6797" b="4954">sample,</wd>

<space/>

<wd l="6878" t="4752" r="7210" b="4910">that</wd>

<space/>

<wd l="7286" t="4752" r="7474" b="4944">is,</wd>

<space/>

<wd l="7555" t="4752" r="7819" b="4910">the</wd>

<space/>

<wd l="7896" t="4752" r="8573" b="4910">reviews</wd>

<space/>

<wd l="8659" t="4752" r="9082" b="4910">from</wd>

<space/>

<wd l="9154" t="4752" r="9931" b="4910">Mercado</wd>

<space/>

<wd l="10013" t="4752" r="10483" b="4910">Livre</wd>

<space/>

</ln>

<ln l="6130" t="5006" r="10488" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="5006" r="6758" b="5165">deviate</wd>

<space/>

<wd l="6840" t="5054" r="7286" b="5165">more</wd>

<space/>

<wd l="7373" t="5006" r="7795" b="5165">from</wd>

<space/>

<wd l="7882" t="5006" r="8621" b="5165">standard</wd>

<space/>

<wd l="8702" t="5006" r="9485" b="5208">language</wd>

<space/>

<wd l="9566" t="5006" r="9950" b="5165">than</wd>

<space/>

<wd l="10022" t="5006" r="10488" b="5165">those</wd>

<space/>

</ln>

<ln l="6130" t="5261" r="7397" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="5261" r="6557" b="5419">from</wd>

<space/>

<wd l="6605" t="5261" r="7397" b="5462">Buscapé.</wd>

</ln>

</para>

<para l="6125" t="5510" r="10512" b="7229" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6350" t="5510" r="10488" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="5515" r="6662" b="5669">For</wd>

<space/>

<wd l="6787" t="5510" r="7056" b="5669">the</wd>

<space/>

<wd l="7190" t="5510" r="8285" b="5712">misspellings</wd>

<space/>

<wd l="8419" t="5510" r="8981" b="5669">whose</wd>

<space/>

<wd l="9120" t="5510" r="10085" b="5669">corrections</wd>

<space/>

<wd l="10229" t="5558" r="10488" b="5669">are</wd>

<space/>

</ln>

<ln l="6130" t="5765" r="10512" b="5957" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5765" r="7238" b="5957">context-free,</wd>

<space/>

<wd l="7354" t="5765" r="8486" b="5923">UGCNormal</wd>

<space/>

<wd l="8606" t="5765" r="9389" b="5923">achieved</wd>

<space/>

<wd l="9504" t="5813" r="9600" b="5923">a</wd>

<space/>

<wd l="9706" t="5765" r="10190" b="5923">recall</wd>

<space/>

<wd l="10310" t="5765" r="10512" b="5923">of</wd>

<space/>

</ln>

<ln l="6134" t="6019" r="10488" b="6221" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6019" r="6523" b="6182">89%</wd>

<space/>

<wd l="6643" t="6019" r="6811" b="6173">in</wd>

<space/>

<wd l="6922" t="6019" r="7666" b="6221">Buscapé</wd>

<space/>

<wd l="7786" t="6067" r="8362" b="6221">corpus</wd>

<space/>

<wd l="8486" t="6019" r="8803" b="6178">and</wd>

<space/>

<wd l="8918" t="6019" r="9307" b="6182">80%</wd>

<space/>

<wd l="9432" t="6019" r="9595" b="6173">in</wd>

<space/>

<wd l="9710" t="6019" r="10488" b="6178">Mercado</wd>

<space/>

</ln>

<ln l="6125" t="6269" r="10488" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6269" r="6600" b="6427">Livre</wd>

<space/>

<wd l="6682" t="6317" r="7310" b="6470">corpus.</wd>

<space/>

<wd l="7402" t="6269" r="7786" b="6427">This</wd>

<space/>

<wd l="7872" t="6269" r="8755" b="6427">difference</wd>

<space/>

<wd l="8837" t="6317" r="9211" b="6470">may</wd>

<space/>

<wd l="9283" t="6269" r="9494" b="6427">be</wd>

<space/>

<wd l="9576" t="6269" r="9888" b="6427">due</wd>

<space/>

<wd l="9970" t="6288" r="10138" b="6427">to</wd>

<space/>

<wd l="10219" t="6269" r="10488" b="6427">the</wd>

<space/>

</ln>

<ln l="6134" t="6523" r="10507" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="6523" r="6595" b="6682">small</wd>

<space/>

<wd l="6710" t="6523" r="7042" b="6682">size</wd>

<space/>

<wd l="7152" t="6523" r="7354" b="6682">of</wd>

<space/>

<wd l="7430" t="6523" r="7824" b="6682">both</wd>

<space/>

<wd l="7934" t="6523" r="8630" b="6725">samples</wd>

<space/>

<wd l="8741" t="6523" r="9062" b="6682">and</wd>

<space/>

<wd l="9158" t="6523" r="9427" b="6682">the</wd>

<space/>

<wd l="9528" t="6523" r="10200" b="6682">number</wd>

<space/>

<wd l="10306" t="6523" r="10507" b="6682">of</wd>

<space/>

</ln>

<ln l="6125" t="6778" r="10488" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6778" r="7219" b="6979">misspellings</wd>

<space/>

<wd l="7306" t="6778" r="7550" b="6979">(in</wd>

<space/>

<wd l="7627" t="6778" r="8405" b="6936">Mercado</wd>

<space/>

<wd l="8482" t="6778" r="8957" b="6936">Livre</wd>

<space/>

<wd l="9038" t="6778" r="9475" b="6936">there</wd>

<space/>

<wd l="9557" t="6826" r="9821" b="6936">are</wd>

<space/>

<wd l="9907" t="6778" r="10488" b="6936">almost</wd>

<space/>

</ln>

<ln l="6125" t="7027" r="9922" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="7027" r="6600" b="7186">twice</wd>

<space/>

<wd l="6662" t="7075" r="6835" b="7186">as</wd>

<space/>

<wd l="6893" t="7075" r="7382" b="7229">many</wd>

<space/>

<wd l="7435" t="7027" r="8530" b="7229">misspellings</wd>

<space/>

<wd l="8597" t="7075" r="8770" b="7186">as</wd>

<space/>

<wd l="8837" t="7027" r="9005" b="7181">in</wd>

<space/>

<wd l="9058" t="7027" r="9922" b="7229">Buscapé).</wd>

</ln>

</para>

<para l="6130" t="7560" r="10493" b="8266" alignment="justified" spaceBefore="278" spaceAfter="249" lsp="exactly" lspExact="253" language="en">

<ln l="6130" t="7560" r="10493" b="7718" baseLine="7709" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7560" r="6624" b="7718">Table</wd>

<space/>

<wd l="6686" t="7560" r="6835" b="7718">2:</wd>

<space/>

<wd l="6907" t="7560" r="7973" b="7718">Distribution</wd>

<space/>

<wd l="8030" t="7560" r="8232" b="7718">of</wd>

<space/>

<wd l="8266" t="7608" r="8770" b="7718">errors</wd>

<space/>

<wd l="8832" t="7560" r="9149" b="7718">and</wd>

<space/>

<wd l="9206" t="7560" r="10171" b="7718">corrections</wd>

<space/>

<wd l="10234" t="7560" r="10493" b="7718">for</wd>

<space/>

</ln>

<ln l="6130" t="7810" r="10488" b="8011" baseLine="7963" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="7810" r="6528" b="7968">each</wd>

<space/>

<wd l="6595" t="7810" r="7051" b="7968">UGC</wd>

<space/>

<wd l="7133" t="7810" r="7795" b="8011">sample,</wd>

<space/>

<wd l="7872" t="7810" r="8189" b="7968">and</wd>

<space/>

<wd l="8251" t="7810" r="8515" b="7968">the</wd>

<space/>

<wd l="8578" t="7810" r="9067" b="7968">recall</wd>

<space/>

<wd l="9134" t="7810" r="9691" b="7968">values</wd>

<space/>

<wd l="9768" t="7810" r="10022" b="7968">for</wd>

<space/>

<wd l="10090" t="7810" r="10488" b="7968">each</wd>

<space/>

</ln>

<ln l="6130" t="8064" r="7027" b="8266" baseLine="8218" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="8112" r="6557" b="8222">error</wd>

<space/>

<wd l="6605" t="8083" r="7027" b="8266">type.</wd>

</ln>

</para>

<table l="6120" t="8544" r="10488" b="13877" alignment="left" li="1" ri="42" spaceBefore="14" spaceAfter="279">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<gridTable>

<gridCol>1138</gridCol>

<gridCol>1219</gridCol>

<gridCol>1334</gridCol>

<gridCol>677</gridCol>

<gridRow>648</gridRow>

<gridRow>638</gridRow>

<gridRow>644</gridRow>

<gridRow>638</gridRow>

<gridRow>1051</gridRow>

<gridRow>845</gridRow>

<gridRow>432</gridRow>

<gridRow>437</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6283" t="8712" r="7099" b="8880" alignment="left" li="110" spaceBefore="132" spaceAfter="300" lsp="exactly" lspExact="211" language="en">

<ln l="6283" t="8712" r="7099" b="8880" baseLine="8832" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6283" t="8717" r="6730" b="8842">Error</wd>

<space/>

<wd l="6773" t="8722" r="7099" b="8880">type</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7546" t="8712" r="8184" b="8880" alignment="right" ri="291" spaceBefore="132" spaceAfter="300" lsp="exactly" lspExact="211" language="en">

<ln l="7546" t="8712" r="8184" b="8880" baseLine="8832" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7546" t="8712" r="8184" b="8880">Buscapé</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8798" t="8717" r="9485" b="9048" alignment="centered" spaceBefore="137" spaceAfter="94" lsp="exactly" lspExact="206" language="en">

<ln l="8798" t="8717" r="9485" b="8842" baseLine="8832" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8798" t="8717" r="9485" b="8842">Mercado
</wd>

</ln>

<ln l="8933" t="8918" r="9346" b="9048" baseLine="9038" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8933" t="8918" r="9346" b="9048">Livre</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9826" t="8712" r="10459" b="8880" alignment="centered" spaceBefore="132" spaceAfter="300" lsp="exactly" lspExact="211" language="en">

<ln l="9826" t="8712" r="10459" b="8880" baseLine="8832" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-8">

<wd l="9826" t="8712" r="10459" b="8880">Average</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6235" t="9346" r="7133" b="9720" alignment="left" li="108" spaceBefore="119" spaceAfter="97" lsp="exactly" lspExact="206" language="en">

<ln l="6240" t="9346" r="6864" b="9475" baseLine="9466" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6240" t="9384" r="6864" b="9475">common</wd>

<space/>

</ln>

<ln l="6235" t="9547" r="7133" b="9720" baseLine="9672" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6235" t="9547" r="7133" b="9720">misspellings</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7459" t="9350" r="8366" b="9480" alignment="left" spaceBefore="122" spaceAfter="295" lsp="exactly" lspExact="211" language="en">

<tabs position="7459"/>

<ln l="7459" t="9350" r="8366" b="9480" baseLine="9470" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7459" t="9350" r="7862" b="9480">50/56</wd>

<space/>

<wd l="7910" t="9394" r="8011" b="9437">=</wd>

<space/>

<wd l="8064" t="9350" r="8366" b="9480">0.89</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8702" t="9350" r="9701" b="9480" alignment="left" spaceBefore="122" spaceAfter="295" lsp="exactly" lspExact="211" language="en">

<tabs position="8702"/>

<ln l="8702" t="9350" r="9701" b="9480" baseLine="9470" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8702" t="9350" r="9192" b="9480">87/108</wd>

<space/>

<wd l="9245" t="9394" r="9346" b="9437">=</wd>

<space/>

<wd l="9394" t="9350" r="9701" b="9480">0.80</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9994" t="9350" r="10301" b="9480" alignment="left" spaceBefore="122" spaceAfter="295" lsp="exactly" lspExact="211" language="en">

<tabs position="9994"/>

<ln l="9994" t="9350" r="10301" b="9480" baseLine="9470" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9994" t="9350" r="10301" b="9480">0.84</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6235" t="9979" r="7133" b="10358" alignment="left" li="108" spaceBefore="114" spaceAfter="102" lsp="exactly" lspExact="211" language="en">

<ln l="6235" t="9979" r="6936" b="10114" baseLine="10104" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6235" t="9979" r="6936" b="10114">real-word</wd>

<space/>

</ln>

<ln l="6235" t="10186" r="7133" b="10358" baseLine="10315" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6235" t="10186" r="7133" b="10358">misspellings</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7464" t="9989" r="8366" b="10118" alignment="left" spaceBefore="123" spaceAfter="304" lsp="exactly" lspExact="211" language="en">

<tabs position="7464"/>

<ln l="7464" t="9989" r="8366" b="10118" baseLine="10109" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7464" t="9989" r="7862" b="10118">15/39</wd>

<space/>

<wd l="7910" t="10032" r="8011" b="10075">=</wd>

<space/>

<wd l="8064" t="9989" r="8366" b="10118">0.38</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8789" t="9989" r="9691" b="10118" alignment="left" spaceBefore="123" spaceAfter="304" lsp="exactly" lspExact="211" language="en">

<tabs position="8789"/>

<ln l="8789" t="9989" r="9691" b="10118" baseLine="10109" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8789" t="9989" r="9197" b="10118">24/76</wd>

<space/>

<wd l="9245" t="10032" r="9346" b="10075">=</wd>

<space/>

<wd l="9398" t="9989" r="9691" b="10118">0.31</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9994" t="9989" r="10301" b="10118" alignment="left" spaceBefore="123" spaceAfter="304" lsp="exactly" lspExact="211" language="en">

<tabs position="9994"/>

<ln l="9994" t="9989" r="10301" b="10118" baseLine="10109" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9994" t="9989" r="10301" b="10118">0.34</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6240" t="10618" r="6792" b="10997" alignment="left" li="108" ri="468" spaceBefore="119" spaceAfter="97" lsp="exactly" lspExact="206" language="en">

<ln l="6240" t="10618" r="6792" b="10752" baseLine="10747" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6240" t="10618" r="6792" b="10752">internet</wd>

<space/>

</ln>

<ln l="6240" t="10824" r="6619" b="10997" baseLine="10954" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6240" t="10824" r="6619" b="10997">slang</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7632" t="10627" r="8366" b="10757" alignment="left" spaceBefore="122" spaceAfter="295" lsp="exactly" lspExact="211" language="en">

<tabs position="7632"/>

<ln l="7632" t="10627" r="8366" b="10757" baseLine="10752" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7632" t="10627" r="7862" b="10757">4/6</wd>

<space/>

<wd l="7910" t="10670" r="8011" b="10714">=</wd>

<space/>

<wd l="8064" t="10627" r="8366" b="10757">0.67</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8798" t="10627" r="9701" b="10757" alignment="left" spaceBefore="122" spaceAfter="295" lsp="exactly" lspExact="211" language="en">

<tabs position="8798"/>

<ln l="8798" t="10627" r="9701" b="10757" baseLine="10752" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8798" t="10627" r="9197" b="10757">15/25</wd>

<space/>

<wd l="9245" t="10670" r="9346" b="10714">=</wd>

<space/>

<wd l="9398" t="10627" r="9701" b="10757">0.60</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9994" t="10627" r="10291" b="10757" alignment="left" spaceBefore="122" spaceAfter="295" lsp="exactly" lspExact="211" language="en">

<tabs position="9994"/>

<ln l="9994" t="10627" r="10291" b="10757" baseLine="10752" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9994" t="10627" r="10291" b="10757">0.61</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6235" t="11266" r="7003" b="12053" alignment="left" li="108" ri="252" spaceBefore="120" spaceAfter="97" lsp="exactly" lspExact="206" language="en">

<ln l="6240" t="11266" r="6826" b="11390" baseLine="11386" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6240" t="11299" r="6542" b="11390">case</wd>

<space/>

<wd l="6590" t="11299" r="6826" b="11390">use</wd>

<space/>

</ln>

<ln l="6240" t="11462" r="6768" b="11630" baseLine="11592" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6240" t="11462" r="6768" b="11630">(proper</wd>

<space/>

</ln>

<ln l="6235" t="11669" r="7003" b="11803" baseLine="11798" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6235" t="11712" r="6691" b="11803">names</wd>

<space/>

<wd l="6744" t="11669" r="7003" b="11803">and</wd>

<space/>

</ln>

<ln l="6240" t="11880" r="6989" b="12053" baseLine="12005" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6240" t="11880" r="6989" b="12053">acronyms)</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7464" t="11266" r="8362" b="11395" alignment="left" spaceBefore="122" spaceAfter="708" lsp="exactly" lspExact="211" language="en">

<tabs position="7464"/>

<ln l="7464" t="11266" r="8362" b="11395" baseLine="11390" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7464" t="11266" r="7858" b="11395">11/12</wd>

<space/>

<wd l="7910" t="11309" r="8011" b="11352">=</wd>

<space/>

<wd l="8064" t="11266" r="8362" b="11395">0.92</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8798" t="11266" r="9701" b="11395" alignment="left" spaceBefore="122" spaceAfter="708" lsp="exactly" lspExact="211" language="en">

<tabs position="8798"/>

<ln l="8798" t="11266" r="9701" b="11395" baseLine="11390" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8798" t="11266" r="9197" b="11395">13/19</wd>

<space/>

<wd l="9245" t="11309" r="9346" b="11352">=</wd>

<space/>

<wd l="9398" t="11266" r="9701" b="11395">0.68</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9994" t="11266" r="10301" b="11395" alignment="left" spaceBefore="122" spaceAfter="708" lsp="exactly" lspExact="211" language="en">

<tabs position="9994"/>

<ln l="9994" t="11266" r="10301" b="11395" baseLine="11390" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="9994" t="11266" r="10301" b="11395">0.77</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6240" t="12322" r="6907" b="12893" alignment="centered" spaceBefore="120" spaceAfter="102" lsp="exactly" lspExact="206" language="en">

<ln l="6240" t="12322" r="6826" b="12446" baseLine="12437" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6240" t="12355" r="6542" b="12446">case</wd>

<space/>

<wd l="6590" t="12355" r="6826" b="12446">use
</wd>

</ln>

<ln l="6240" t="12518" r="6826" b="12686" baseLine="12643" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6240" t="12518" r="6610" b="12686">(start</wd>

<space/>

<wd l="6658" t="12523" r="6826" b="12653">of
</wd>

</ln>

<ln l="6240" t="12725" r="6907" b="12893" baseLine="12850" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6240" t="12725" r="6907" b="12893">sentence)</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7464" t="12322" r="8366" b="12451" alignment="left" spaceBefore="123" spaceAfter="506" lsp="exactly" lspExact="211" language="en">

<tabs position="7464"/>

<ln l="7464" t="12322" r="8366" b="12451" baseLine="12442" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7464" t="12322" r="7862" b="12451">14/14</wd>

<space/>

<wd l="7910" t="12365" r="8011" b="12408">=</wd>

<space/>

<wd l="8069" t="12322" r="8366" b="12451">1.00</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8880" t="12322" r="9701" b="12451" alignment="left" spaceBefore="123" spaceAfter="506" lsp="exactly" lspExact="211" language="en">

<tabs position="8880"/>

<ln l="8880" t="12322" r="9701" b="12451" baseLine="12442" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8880" t="12322" r="9187" b="12451">7/12</wd>

<space/>

<wd l="9245" t="12365" r="9346" b="12408">=</wd>

<space/>

<wd l="9394" t="12322" r="9701" b="12451">0.58</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="top">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9994" t="12322" r="10291" b="12451" alignment="left" spaceBefore="123" spaceAfter="506" lsp="exactly" lspExact="211" language="en">

<tabs position="9994"/>

<ln l="9994" t="12322" r="10291" b="12451" baseLine="12442" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9994" t="12322" r="10291" b="12451">0.81</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6240" t="13157" r="7118" b="13330" alignment="left" li="110" spaceBefore="119" spaceAfter="102" lsp="exactly" lspExact="206" language="en">

<ln l="6240" t="13157" r="7118" b="13330" baseLine="13282" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6240" t="13157" r="6638" b="13330">glued</wd>

<space/>

<wd l="6682" t="13157" r="7118" b="13291">words</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7862" t="13166" r="8366" b="13296" alignment="right" ri="111" spaceBefore="122" spaceAfter="94" lsp="exactly" lspExact="211" language="en">

<ln l="7862" t="13166" r="8366" b="13296" baseLine="13286" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7862" t="13166" r="8078" b="13296">0/2</wd>

<space/>

<wd l="8131" t="13210" r="8232" b="13253">=</wd>

<space/>

<wd l="8285" t="13166" r="8366" b="13296">0</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8966" t="13166" r="9696" b="13296" alignment="left" spaceBefore="122" spaceAfter="94" lsp="exactly" lspExact="211" language="en">

<tabs position="8966"/>

<ln l="8966" t="13166" r="9696" b="13296" baseLine="13286" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8966" t="13166" r="9197" b="13296">2/6</wd>

<space/>

<wd l="9245" t="13210" r="9346" b="13253">=</wd>

<space/>

<wd l="9398" t="13166" r="9696" b="13296">0.33</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9994" t="13166" r="10301" b="13296" alignment="left" spaceBefore="122" spaceAfter="94" lsp="exactly" lspExact="211" language="en">

<tabs position="9994"/>

<ln l="9994" t="13166" r="10301" b="13296" baseLine="13286" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9994" t="13166" r="10301" b="13296">0.25</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="6230" t="13589" r="7085" b="13757" alignment="left" li="110" spaceBefore="119" spaceAfter="102" lsp="exactly" lspExact="206" language="en">

<ln l="6230" t="13589" r="7085" b="13757" baseLine="13714" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6230" t="13589" r="7085" b="13757">punctuation</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="7454" t="13598" r="8366" b="13728" alignment="left" spaceBefore="122" spaceAfter="94" lsp="exactly" lspExact="211" language="en">

<tabs position="7454"/>

<ln l="7454" t="13598" r="8366" b="13728" baseLine="13718" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7454" t="13598" r="7862" b="13728">44/47</wd>

<space/>

<wd l="7910" t="13642" r="8011" b="13685">=</wd>

<space/>

<wd l="8064" t="13598" r="8366" b="13728">0.94</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="8794" t="13598" r="9696" b="13728" alignment="left" spaceBefore="122" spaceAfter="94" lsp="exactly" lspExact="211" language="en">

<tabs position="8794"/>

<ln l="8794" t="13598" r="9696" b="13728" baseLine="13718" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8794" t="13598" r="9197" b="13728">58/79</wd>

<space/>

<wd l="9245" t="13642" r="9346" b="13685">=</wd>

<space/>

<wd l="9398" t="13598" r="9696" b="13728">0.73</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<leftBorder type="single" width="14"/>

<topBorder type="single" width="14"/>

<rightBorder type="single" width="14"/>

<bottomBorder type="single" width="14"/>

<para l="9994" t="13598" r="10291" b="13728" alignment="left" spaceBefore="122" spaceAfter="94" lsp="exactly" lspExact="211" language="en">

<tabs position="9994"/>

<ln l="9994" t="13598" r="10291" b="13728" baseLine="13718" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="9994" t="13598" r="10291" b="13728">0.81</wd>

</ln>

</para>

</cell>

</table>

<para l="6120" t="14203" r="10488" b="15418" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6350" t="14203" r="10488" b="14405" baseLine="14352" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14208" r="6658" b="14362">We</wd>

<space/>

<wd l="6710" t="14203" r="7555" b="14362">evaluated</wd>

<space/>

<wd l="7598" t="14203" r="7867" b="14362">the</wd>

<space/>

<wd l="7915" t="14203" r="8275" b="14362">task</wd>

<space/>

<wd l="8318" t="14203" r="8779" b="14362">noise</wd>

<space/>

<wd l="8827" t="14203" r="9542" b="14362">removal</wd>

<space/>

<wd l="9600" t="14203" r="9768" b="14357">in</wd>

<space/>

<wd l="9821" t="14251" r="9917" b="14362">a</wd>

<space/>

<wd l="9974" t="14203" r="10488" b="14405">single</wd>

<space/>

</ln>

<ln l="6120" t="14458" r="10483" b="14659" baseLine="14606" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="14506" r="6552" b="14659">pass,</wd>

<space/>

<wd l="6874" t="14458" r="7834" b="14659">identifying</wd>

<space/>

<wd l="8150" t="14458" r="8467" b="14616">and</wd>

<space/>

<wd l="8774" t="14458" r="9662" b="14659">correcting</wd>

<space/>

<wd l="9979" t="14506" r="10483" b="14616">errors</wd>

<space/>

</ln>

<ln l="6134" t="14707" r="10483" b="14909" baseLine="14861" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="14707" r="7498" b="14909">simultaneously.</wd>

<space/>

<wd l="7646" t="14707" r="8563" b="14899">Therefore,</wd>

<space/>

<wd l="8707" t="14755" r="9168" b="14866">cases</wd>

<space/>

<wd l="9307" t="14707" r="9840" b="14866">where</wd>

<space/>

<wd l="9979" t="14755" r="10483" b="14866">errors</wd>

<space/>

</ln>

<ln l="6125" t="14962" r="10488" b="15120" baseLine="15115" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="15010" r="6552" b="15120">were</wd>

<space/>

<wd l="6610" t="14962" r="7454" b="15120">identified</wd>

<space/>

<wd l="7498" t="14962" r="7786" b="15120">but</wd>

<space/>

<wd l="7838" t="14981" r="8117" b="15120">not</wd>

<space/>

<wd l="8174" t="14962" r="8995" b="15120">corrected</wd>

<space/>

<wd l="9043" t="15010" r="9470" b="15120">were</wd>

<space/>

<wd l="9523" t="14962" r="10003" b="15120">taken</wd>

<space/>

<wd l="10056" t="14981" r="10224" b="15120">to</wd>

<space/>

<wd l="10277" t="14962" r="10488" b="15120">be</wd>

<space/>

</ln>

<ln l="6130" t="15216" r="9274" b="15418" baseLine="15365" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="15216" r="6778" b="15374">failures</wd>

<space/>

<wd l="6816" t="15216" r="7157" b="15418">just</wd>

<space/>

<wd l="7214" t="15216" r="7536" b="15374">like</wd>

<space/>

<wd l="7594" t="15216" r="8664" b="15374">unidentified</wd>

<space/>

<wd l="8717" t="15264" r="9274" b="15374">errors.</wd>

</ln>

</para>

</column>

</section>

<dd l="1408" t="15736" r="10530" b="15977">

<para l="5800" t="15792" r="6138" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15792" r="6072" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="39">

<wd l="5866" t="15792" r="6072" b="15946">43</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1403" marginTop="1417" marginRight="1384" marginBottom="1302" offsetX="-6" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1403" t="1417" r="10525" b="15335">

<column l="1403" t="1417" r="5814" b="15335">

<para l="1421" t="1464" r="5789" b="2635" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="1464" r="5779" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1646" t="1469" r="2501" b="1656">However,</wd>

<space/>

<wd l="2659" t="1464" r="2779" b="1622">it</wd>

<space/>

<wd l="2933" t="1464" r="3072" b="1622">is</wd>

<space/>

<wd l="3226" t="1464" r="3739" b="1622">worth</wd>

<space/>

<wd l="3888" t="1464" r="4886" b="1666">mentioning</wd>

<space/>

<wd l="5035" t="1464" r="5366" b="1622">that</wd>

<space/>

<wd l="5515" t="1464" r="5779" b="1622">the</wd>

<space/>

</ln>

<ln l="1421" t="1718" r="5789" b="1877" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="1718" r="2376" b="1877">normalizer</wd>

<space/>

<wd l="2424" t="1718" r="2923" b="1877">failed</wd>

<space/>

<wd l="2962" t="1738" r="3130" b="1877">to</wd>

<space/>

<wd l="3187" t="1738" r="3792" b="1877">correct</wd>

<space/>

<wd l="3845" t="1718" r="3941" b="1877">6</wd>

<space/>

<wd l="3994" t="1738" r="4334" b="1877">true</wd>

<space/>

<wd l="4387" t="1766" r="4891" b="1877">errors</wd>

<space/>

<wd l="4944" t="1718" r="5789" b="1877">identified</wd>

<space/>

</ln>

<ln l="1426" t="1968" r="5779" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="1968" r="1589" b="2122">in</wd>

<space/>

<wd l="1757" t="1968" r="2026" b="2126">the</wd>

<space/>

<wd l="2194" t="1968" r="2933" b="2170">Buscapé</wd>

<space/>

<wd l="3110" t="1968" r="3725" b="2170">sample</wd>

<space/>

<wd l="3893" t="1968" r="4214" b="2126">and</wd>

<space/>

<wd l="4402" t="1968" r="4594" b="2122">14</wd>

<space/>

<wd l="4762" t="1987" r="5102" b="2126">true</wd>

<space/>

<wd l="5275" t="2016" r="5779" b="2126">errors</wd>

<space/>

</ln>

<ln l="1426" t="2222" r="5789" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="2222" r="2266" b="2381">identified</wd>

<space/>

<wd l="2318" t="2222" r="2486" b="2376">in</wd>

<space/>

<wd l="2534" t="2222" r="2798" b="2381">the</wd>

<space/>

<wd l="2851" t="2222" r="3629" b="2381">Mercado</wd>

<space/>

<wd l="3686" t="2222" r="4157" b="2381">Livre</wd>

<space/>

<wd l="4224" t="2222" r="4882" b="2424">sample.</wd>

<space/>

<wd l="4944" t="2222" r="5280" b="2381">The</wd>

<space/>

<wd l="5338" t="2222" r="5789" b="2381">other</wd>

<space/>

</ln>

<ln l="1421" t="2477" r="5438" b="2635" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="2477" r="2645" b="2635">non-corrected</wd>

<space/>

<wd l="2702" t="2525" r="3206" b="2635">errors</wd>

<space/>

<wd l="3264" t="2525" r="3691" b="2635">were</wd>

<space/>

<wd l="3744" t="2496" r="4027" b="2635">not</wd>

<space/>

<wd l="4085" t="2525" r="4498" b="2635">even</wd>

<space/>

<wd l="4555" t="2477" r="5438" b="2635">identified.</wd>

</ln>

</para>

<para l="1421" t="2726" r="5803" b="3691" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="2726" r="5803" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2726" r="1987" b="2885">The</wd>

<space/>

<wd l="2059" t="2726" r="3283" b="2885">normalization</wd>

<space/>

<wd l="3350" t="2726" r="3691" b="2885">tool</wd>

<space/>

<wd l="3768" t="2726" r="4589" b="2885">corrected</wd>

<space/>

<wd l="4666" t="2726" r="5050" b="2890">66%</wd>

<space/>

<wd l="5131" t="2726" r="5520" b="2928">(138</wd>

<space/>

<wd l="5606" t="2726" r="5803" b="2885">of</wd>

<space/>

</ln>

<ln l="1426" t="2981" r="5784" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2981" r="1819" b="3182">209)</wd>

<space/>

<wd l="1954" t="2981" r="2155" b="3139">of</wd>

<space/>

<wd l="2256" t="2981" r="2525" b="3139">the</wd>

<space/>

<wd l="2654" t="2981" r="3470" b="3182">manually</wd>

<space/>

<wd l="3600" t="2981" r="4459" b="3139">annotated</wd>

<space/>

<wd l="4584" t="3029" r="5088" b="3139">errors</wd>

<space/>

<wd l="5222" t="2981" r="5390" b="3134">in</wd>

<space/>

<wd l="5515" t="2981" r="5784" b="3139">the</wd>

<space/>

</ln>

<ln l="1421" t="3235" r="5784" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="3235" r="2165" b="3437">Buscapé</wd>

<space/>

<wd l="2270" t="3235" r="2933" b="3437">sample,</wd>

<space/>

<wd l="3043" t="3235" r="3365" b="3394">and</wd>

<space/>

<wd l="3466" t="3235" r="3854" b="3398">63%</wd>

<space/>

<wd l="3960" t="3235" r="4358" b="3437">(206</wd>

<space/>

<wd l="4464" t="3235" r="4666" b="3394">of</wd>

<space/>

<wd l="4747" t="3235" r="5141" b="3437">325)</wd>

<space/>

<wd l="5251" t="3235" r="5419" b="3389">in</wd>

<space/>

<wd l="5515" t="3235" r="5784" b="3394">the</wd>

<space/>

</ln>

<ln l="1421" t="3490" r="3456" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="3490" r="2198" b="3648">Mercado</wd>

<space/>

<wd l="2256" t="3490" r="2731" b="3648">Livre</wd>

<space/>

<wd l="2798" t="3490" r="3456" b="3691">sample.</wd>

</ln>

</para>

<para l="1421" t="3739" r="5803" b="5966" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="3739" r="5789" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="3739" r="2765" b="3941">Misspellings</wd>

<space/>

<wd l="2928" t="3739" r="3485" b="3898">whose</wd>

<space/>

<wd l="3648" t="3739" r="4536" b="3898">correction</wd>

<space/>

<wd l="4694" t="3739" r="5410" b="3941">depends</wd>

<space/>

<wd l="5573" t="3787" r="5789" b="3898">on</wd>

<space/>

</ln>

<ln l="1426" t="3994" r="5779" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="3994" r="2333" b="4152">contextual</wd>

<space/>

<wd l="2429" t="3994" r="3466" b="4152">information</wd>

<space/>

<wd l="3557" t="4042" r="3979" b="4152">were</wd>

<space/>

<wd l="4075" t="4013" r="4354" b="4152">not</wd>

<space/>

<wd l="4450" t="3994" r="5227" b="4195">expected</wd>

<space/>

<wd l="5314" t="4013" r="5482" b="4152">to</wd>

<space/>

<wd l="5573" t="3994" r="5779" b="4152">be</wd>

<space/>

</ln>

<ln l="1426" t="4248" r="5784" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="4248" r="2285" b="4440">corrected,</wd>

<space/>

<wd l="2371" t="4296" r="2549" b="4406">as</wd>

<space/>

<wd l="2626" t="4248" r="2894" b="4406">the</wd>

<space/>

<wd l="2981" t="4248" r="3562" b="4450">speller</wd>

<space/>

<wd l="3638" t="4248" r="3778" b="4406">is</wd>

<space/>

<wd l="3850" t="4248" r="4363" b="4406">based</wd>

<space/>

<wd l="4440" t="4248" r="4829" b="4450">only</wd>

<space/>

<wd l="4910" t="4296" r="5126" b="4406">on</wd>

<space/>

<wd l="5203" t="4248" r="5784" b="4406">lexical</wd>

<space/>

</ln>

<ln l="1426" t="4498" r="5803" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="4498" r="2501" b="4656">information.</wd>

<space/>

<wd l="2602" t="4502" r="3451" b="4690">However,</wd>

<space/>

<wd l="3547" t="4498" r="4114" b="4656">thanks</wd>

<space/>

<wd l="4205" t="4517" r="4373" b="4656">to</wd>

<space/>

<wd l="4464" t="4498" r="4728" b="4656">the</wd>

<space/>

<wd l="4829" t="4517" r="5515" b="4699">strategy</wd>

<space/>

<wd l="5602" t="4498" r="5803" b="4656">of</wd>

<space/>

</ln>

<ln l="1426" t="4752" r="5784" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="4752" r="2290" b="4954">excluding</wd>

<space/>

<wd l="2467" t="4752" r="3029" b="4954">highly</wd>

<space/>

<wd l="3211" t="4752" r="4114" b="4954">infrequent</wd>

<space/>

<wd l="4291" t="4752" r="4824" b="4910">words</wd>

<space/>

<wd l="5006" t="4752" r="5338" b="4910">that</wd>

<space/>

<wd l="5520" t="4800" r="5784" b="4910">are</wd>

<space/>

</ln>

<ln l="1421" t="5006" r="5774" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5006" r="2501" b="5208">homographs</wd>

<space/>

<wd l="2568" t="5006" r="2770" b="5165">of</wd>

<space/>

<wd l="2808" t="5006" r="3538" b="5208">frequent</wd>

<space/>

<wd l="3595" t="5006" r="4133" b="5165">words</wd>

<space/>

<wd l="4195" t="5006" r="4867" b="5165">without</wd>

<space/>

<wd l="4930" t="5006" r="5774" b="5198">diacritics,</wd>

<space/>

</ln>

<ln l="1430" t="5261" r="5784" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="5309" r="1882" b="5419">some</wd>

<space/>

<wd l="2006" t="5261" r="2400" b="5419">such</wd>

<space/>

<wd l="2520" t="5309" r="3029" b="5419">errors</wd>

<space/>

<wd l="3149" t="5309" r="3571" b="5419">were</wd>

<space/>

<wd l="3696" t="5261" r="4512" b="5419">corrected</wd>

<space/>

<wd l="4627" t="5261" r="5098" b="5462">(38%</wd>

<space/>

<wd l="5222" t="5261" r="5424" b="5419">of</wd>

<space/>

<wd l="5515" t="5261" r="5784" b="5419">the</wd>

<space/>

</ln>

<ln l="1426" t="5510" r="5789" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5510" r="2280" b="5669">annotated</wd>

<space/>

<wd l="2338" t="5558" r="2842" b="5669">errors</wd>

<space/>

<wd l="2914" t="5510" r="3110" b="5669">of</wd>

<space/>

<wd l="3158" t="5510" r="3552" b="5669">such</wd>

<space/>

<wd l="3619" t="5530" r="4368" b="5712">category</wd>

<space/>

<wd l="4435" t="5510" r="4603" b="5664">in</wd>

<space/>

<wd l="4661" t="5510" r="5405" b="5712">Buscapé</wd>

<space/>

<wd l="5472" t="5510" r="5789" b="5669">and</wd>

<space/>

</ln>

<ln l="1426" t="5765" r="3533" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="5765" r="1819" b="5928">31%</wd>

<space/>

<wd l="1882" t="5765" r="2050" b="5918">in</wd>

<space/>

<wd l="2102" t="5765" r="2880" b="5923">Mercado</wd>

<space/>

<wd l="2942" t="5765" r="3533" b="5966">Livre).</wd>

</ln>

</para>

<para l="1416" t="6019" r="5789" b="9000" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="6019" r="5784" b="6178" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1651" t="6019" r="1987" b="6178">The</wd>

<space/>

<wd l="2069" t="6067" r="2443" b="6178">case</wd>

<space/>

<wd l="2520" t="6067" r="2813" b="6178">use</wd>

<space/>

<wd l="2894" t="6019" r="3062" b="6173">in</wd>

<space/>

<wd l="3139" t="6019" r="3408" b="6178">the</wd>

<space/>

<wd l="3494" t="6038" r="3864" b="6178">start</wd>

<space/>

<wd l="3946" t="6019" r="4147" b="6178">of</wd>

<space/>

<wd l="4210" t="6038" r="5045" b="6178">sentences</wd>

<space/>

<wd l="5131" t="6019" r="5448" b="6178">and</wd>

<space/>

<wd l="5515" t="6019" r="5784" b="6178">the</wd>

<space/>

</ln>

<ln l="1416" t="6269" r="5784" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="6269" r="2458" b="6470">punctuation</wd>

<space/>

<wd l="2707" t="6317" r="2966" b="6427">are</wd>

<space/>

<wd l="3211" t="6269" r="3816" b="6427">treated</wd>

<space/>

<wd l="4046" t="6269" r="4272" b="6470">by</wd>

<space/>

<wd l="4512" t="6269" r="4781" b="6427">the</wd>

<space/>

<wd l="5035" t="6288" r="5784" b="6427">sentence</wd>

<space/>

</ln>

<ln l="1430" t="6523" r="5784" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1430" t="6523" r="2592" b="6725">segmentation</wd>

<space/>

<wd l="2851" t="6523" r="3235" b="6682">tool.</wd>

<space/>

<wd l="3509" t="6523" r="4032" b="6682">These</wd>

<space/>

<wd l="4291" t="6523" r="5256" b="6725">procedures</wd>

<space/>

<wd l="5525" t="6571" r="5784" b="6682">are</wd>

<space/>

</ln>

<ln l="1430" t="6778" r="5784" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1430" t="6778" r="2621" b="6936">simultaneous:</wd>

<space/>

<wd l="2813" t="6778" r="2962" b="6931">if</wd>

<space/>

<wd l="3110" t="6826" r="3206" b="6936">a</wd>

<space/>

<wd l="3365" t="6778" r="4406" b="6979">punctuation</wd>

<space/>

<wd l="4574" t="6778" r="5030" b="6936">mark</wd>

<space/>

<wd l="5194" t="6778" r="5333" b="6936">is</wd>

<space/>

<wd l="5501" t="6797" r="5784" b="6936">not</wd>

<space/>

</ln>

<ln l="1426" t="7027" r="5779" b="7229" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="7027" r="2165" b="7219">inserted,</wd>

<space/>

<wd l="2338" t="7027" r="2606" b="7186">the</wd>

<space/>

<wd l="2779" t="7027" r="3283" b="7186">initial</wd>

<space/>

<wd l="3456" t="7027" r="3912" b="7186">word</wd>

<space/>

<wd l="4080" t="7027" r="4478" b="7186">after</wd>

<space/>

<wd l="4651" t="7075" r="4747" b="7186">a</wd>

<space/>

<wd l="4906" t="7027" r="5477" b="7229">period</wd>

<space/>

<wd l="5645" t="7027" r="5779" b="7186">is</wd>

<space/>

</ln>

<ln l="1426" t="7282" r="5784" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="7282" r="2582" b="7483">consequently</wd>

<space/>

<wd l="2635" t="7301" r="2918" b="7440">not</wd>

<space/>

<wd l="2976" t="7282" r="3845" b="7440">converted</wd>

<space/>

<wd l="3898" t="7282" r="4229" b="7440">into</wd>

<space/>

<wd l="4291" t="7330" r="5213" b="7483">uppercase.</wd>

<space/>

<wd l="5285" t="7286" r="5458" b="7435">In</wd>

<space/>

<wd l="5515" t="7282" r="5784" b="7440">the</wd>

<space/>

</ln>

<ln l="1421" t="7536" r="5789" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="7536" r="2198" b="7694">Mercado</wd>

<space/>

<wd l="2275" t="7536" r="2750" b="7694">Livre</wd>

<space/>

<wd l="2832" t="7584" r="3461" b="7738">corpus,</wd>

<space/>

<wd l="3542" t="7536" r="3811" b="7694">the</wd>

<space/>

<wd l="3883" t="7584" r="4176" b="7694">use</wd>

<space/>

<wd l="4258" t="7536" r="4459" b="7694">of</wd>

<space/>

<wd l="4512" t="7584" r="5390" b="7738">uppercase</wd>

<space/>

<wd l="5472" t="7536" r="5789" b="7694">and</wd>

<space/>

</ln>

<ln l="1426" t="7790" r="5784" b="7949" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="7790" r="2294" b="7949">lowercase</wd>

<space/>

<wd l="2371" t="7790" r="2506" b="7949">is</wd>

<space/>

<wd l="2582" t="7790" r="2827" b="7949">far</wd>

<space/>

<wd l="2890" t="7838" r="3341" b="7949">more</wd>

<space/>

<wd l="3408" t="7790" r="4762" b="7949">unconventional</wd>

<space/>

<wd l="4834" t="7790" r="5208" b="7949">than</wd>

<space/>

<wd l="5285" t="7790" r="5453" b="7944">in</wd>

<space/>

<wd l="5515" t="7790" r="5784" b="7949">the</wd>

<space/>

</ln>

<ln l="1421" t="8040" r="5784" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="8040" r="2165" b="8242">Buscapé</wd>

<space/>

<wd l="2213" t="8088" r="2794" b="8242">corpus</wd>

<space/>

<wd l="2846" t="8040" r="3163" b="8198">and</wd>

<space/>

<wd l="3202" t="8040" r="3518" b="8198">this</wd>

<space/>

<wd l="3571" t="8040" r="4296" b="8242">explains</wd>

<space/>

<wd l="4344" t="8040" r="4613" b="8198">the</wd>

<space/>

<wd l="4666" t="8040" r="5784" b="8198">deterioration</wd>

<space/>

</ln>

<ln l="1426" t="8294" r="5784" b="8496" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="8294" r="1627" b="8453">of</wd>

<space/>

<wd l="1752" t="8294" r="2323" b="8453">results</wd>

<space/>

<wd l="2482" t="8294" r="2650" b="8448">in</wd>

<space/>

<wd l="2803" t="8342" r="3173" b="8453">case</wd>

<space/>

<wd l="3326" t="8342" r="3614" b="8453">use</wd>

<space/>

<wd l="3773" t="8294" r="4090" b="8453">and</wd>

<space/>

<wd l="4229" t="8294" r="5314" b="8496">punctuation.</wd>

<space/>

<wd l="5477" t="8299" r="5784" b="8453">For</wd>

<space/>

</ln>

<ln l="1426" t="8549" r="5784" b="8750" baseLine="8698" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="8549" r="2213" b="8750">example,</wd>

<space/>

<wd l="2266" t="8549" r="2434" b="8702">in</wd>

<space/>

<wd l="2477" t="8549" r="3254" b="8707">Mercado</wd>

<space/>

<wd l="3302" t="8549" r="3826" b="8741">Livre,</wd>

<space/>

<wd l="3874" t="8549" r="4426" b="8707">unlike</wd>

<space/>

<wd l="4474" t="8549" r="4642" b="8702">in</wd>

<space/>

<wd l="4685" t="8549" r="5477" b="8750">Buscapé,</wd>

<space/>

<wd l="5530" t="8597" r="5784" b="8707">we</wd>

<space/>

</ln>

<ln l="1426" t="8798" r="5587" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="8798" r="1939" b="8957">found</wd>

<space/>

<wd l="1987" t="8798" r="2669" b="8957">reviews</wd>

<space/>

<wd l="2731" t="8798" r="3706" b="9000">completely</wd>

<space/>

<wd l="3758" t="8798" r="4382" b="8957">written</wd>

<space/>

<wd l="4440" t="8798" r="4608" b="8952">in</wd>

<space/>

<wd l="4666" t="8846" r="5587" b="9000">uppercase.</wd>

</ln>

</para>

<para l="1421" t="9053" r="5789" b="11026" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="9053" r="5774" b="9254" baseLine="9206" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9053" r="1987" b="9211">The</wd>

<space/>

<wd l="2054" t="9053" r="3019" b="9211">conversion</wd>

<space/>

<wd l="3086" t="9053" r="3283" b="9211">of</wd>

<space/>

<wd l="3322" t="9101" r="3902" b="9254">proper</wd>

<space/>

<wd l="3960" t="9101" r="4483" b="9211">nouns</wd>

<space/>

<wd l="4555" t="9053" r="4872" b="9211">and</wd>

<space/>

<wd l="4934" t="9101" r="5774" b="9254">acronyms</wd>

<space/>

</ln>

<ln l="1421" t="9307" r="5789" b="9509" baseLine="9456" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9326" r="1584" b="9466">to</wd>

<space/>

<wd l="1642" t="9355" r="2568" b="9509">uppercase,</wd>

<space/>

<wd l="2630" t="9355" r="2808" b="9466">as</wd>

<space/>

<wd l="2866" t="9307" r="3235" b="9466">well</wd>

<space/>

<wd l="3298" t="9355" r="3470" b="9466">as</wd>

<space/>

<wd l="3528" t="9307" r="3797" b="9466">the</wd>

<space/>

<wd l="3850" t="9307" r="4814" b="9466">conversion</wd>

<space/>

<wd l="4872" t="9307" r="5069" b="9466">of</wd>

<space/>

<wd l="5107" t="9312" r="5789" b="9466">Internet</wd>

<space/>

</ln>

<ln l="1430" t="9557" r="5779" b="9758" baseLine="9710" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="9557" r="1963" b="9758">slangs</wd>

<space/>

<wd l="2064" t="9576" r="2232" b="9715">to</wd>

<space/>

<wd l="2328" t="9557" r="2597" b="9715">the</wd>

<space/>

<wd l="2698" t="9557" r="3442" b="9715">standard</wd>

<space/>

<wd l="3533" t="9557" r="4368" b="9758">language,</wd>

<space/>

<wd l="4474" t="9605" r="4738" b="9715">are</wd>

<space/>

<wd l="4834" t="9576" r="5160" b="9715">two</wd>

<space/>

<wd l="5261" t="9557" r="5779" b="9715">issues</wd>

<space/>

</ln>

<ln l="1421" t="9811" r="5779" b="10013" baseLine="9965" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="9811" r="1747" b="9970">that</wd>

<space/>

<wd l="1829" t="9811" r="2467" b="10013">depend</wd>

<space/>

<wd l="2539" t="9859" r="2755" b="9970">on</wd>

<space/>

<wd l="2832" t="9811" r="3096" b="9970">the</wd>

<space/>

<wd l="3173" t="9811" r="4061" b="10013">respective</wd>

<space/>

<wd l="4147" t="9811" r="4920" b="9970">lexicons.</wd>

<space/>

<wd l="5011" t="9811" r="5251" b="9970">As</wd>

<space/>

<wd l="5338" t="9811" r="5779" b="10003">such,</wd>

<space/>

</ln>

<ln l="1426" t="10066" r="5784" b="10267" baseLine="10214" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10066" r="2150" b="10224">lexicons</wd>

<space/>

<wd l="2318" t="10066" r="3091" b="10267">resulting</wd>

<space/>

<wd l="3259" t="10066" r="3686" b="10224">from</wd>

<space/>

<wd l="3850" t="10066" r="4118" b="10224">the</wd>

<space/>

<wd l="4296" t="10066" r="4997" b="10267">analysis</wd>

<space/>

<wd l="5170" t="10066" r="5371" b="10224">of</wd>

<space/>

<wd l="5515" t="10066" r="5784" b="10224">the</wd>

<space/>

</ln>

<ln l="1421" t="10320" r="5779" b="10522" baseLine="10469" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10320" r="2165" b="10522">Buscapé</wd>

<space/>

<wd l="2251" t="10368" r="2827" b="10522">corpus</wd>

<space/>

<wd l="2918" t="10368" r="3182" b="10478">are</wd>

<space/>

<wd l="3269" t="10339" r="3547" b="10478">not</wd>

<space/>

<wd l="3638" t="10320" r="4464" b="10478">sufficient</wd>

<space/>

<wd l="4546" t="10339" r="4709" b="10478">to</wd>

<space/>

<wd l="4800" t="10320" r="5482" b="10522">identify</wd>

<space/>

<wd l="5568" t="10320" r="5779" b="10478">all</wd>

<space/>

</ln>

<ln l="1421" t="10570" r="5779" b="10771" baseLine="10723" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="10570" r="1685" b="10728">the</wd>

<space/>

<wd l="1771" t="10618" r="2352" b="10771">proper</wd>

<space/>

<wd l="2438" t="10618" r="3014" b="10762">nouns,</wd>

<space/>

<wd l="3115" t="10618" r="3955" b="10771">acronyms</wd>

<space/>

<wd l="4056" t="10570" r="4373" b="10728">and</wd>

<space/>

<wd l="4469" t="10574" r="5146" b="10728">Internet</wd>

<space/>

<wd l="5242" t="10570" r="5779" b="10771">slangs</wd>

<space/>

</ln>

<ln l="1426" t="10824" r="4224" b="11026" baseLine="10978" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="10824" r="1848" b="10982">from</wd>

<space/>

<wd l="1901" t="10824" r="2170" b="10982">the</wd>

<space/>

<wd l="2227" t="10824" r="3005" b="10982">Mercado</wd>

<space/>

<wd l="3062" t="10824" r="3538" b="10982">Livre</wd>

<space/>

<wd l="3600" t="10872" r="4224" b="11026">corpus.</wd>

</ln>

</para>

<para l="1421" t="11078" r="5784" b="12797" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1646" t="11078" r="5779" b="11280" baseLine="11227" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="11078" r="2318" b="11280">Finally,</wd>

<space/>

<wd l="2386" t="11078" r="2654" b="11237">the</wd>

<space/>

<wd l="2717" t="11078" r="3206" b="11280">glued</wd>

<space/>

<wd l="3264" t="11078" r="3797" b="11237">words</wd>

<space/>

<wd l="3864" t="11126" r="4123" b="11237">are</wd>

<space/>

<wd l="4186" t="11078" r="5179" b="11237">normalized</wd>

<space/>

<wd l="5232" t="11078" r="5458" b="11280">by</wd>

<space/>

<wd l="5515" t="11078" r="5779" b="11237">the</wd>

<space/>

</ln>

<ln l="1421" t="11328" r="5784" b="11530" baseLine="11482" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="11328" r="2242" b="11486">tokenizer</wd>

<space/>

<wd l="2400" t="11328" r="2789" b="11530">only</wd>

<space/>

<wd l="2952" t="11328" r="3120" b="11482">in</wd>

<space/>

<wd l="3283" t="11376" r="3739" b="11486">cases</wd>

<space/>

<wd l="3902" t="11328" r="4435" b="11486">where</wd>

<space/>

<wd l="4603" t="11328" r="5352" b="11486">numbers</wd>

<space/>

<wd l="5525" t="11376" r="5784" b="11486">are</wd>

<space/>

</ln>

<ln l="1426" t="11582" r="5779" b="11784" baseLine="11736" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11582" r="2208" b="11741">followed</wd>

<space/>

<wd l="2270" t="11582" r="2496" b="11784">by</wd>

<space/>

<wd l="2568" t="11582" r="2995" b="11741">units</wd>

<space/>

<wd l="3077" t="11582" r="3278" b="11741">of</wd>

<space/>

<wd l="3331" t="11602" r="4546" b="11741">measurement.</wd>

<space/>

<wd l="4642" t="11582" r="5179" b="11741">Glued</wd>

<space/>

<wd l="5246" t="11582" r="5779" b="11741">words</wd>

<space/>

</ln>

<ln l="1426" t="11837" r="5779" b="12038" baseLine="11986" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11885" r="1685" b="11995">are</wd>

<space/>

<wd l="1742" t="11885" r="2083" b="11995">rare</wd>

<space/>

<wd l="2146" t="11837" r="2314" b="11990">in</wd>

<space/>

<wd l="2362" t="11837" r="2760" b="11995">both</wd>

<space/>

<wd l="2818" t="11837" r="3662" b="11995">evaluated</wd>

<space/>

<wd l="3715" t="11885" r="4430" b="12038">corpora,</wd>

<space/>

<wd l="4488" t="11837" r="4776" b="11995">but</wd>

<space/>

<wd l="4829" t="11885" r="5083" b="11995">we</wd>

<space/>

<wd l="5141" t="11837" r="5563" b="11995">need</wd>

<space/>

<wd l="5611" t="11856" r="5779" b="11995">to</wd>

<space/>

</ln>

<ln l="1421" t="12091" r="5779" b="12250" baseLine="12240" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="12091" r="1944" b="12250">tackle</wd>

<space/>

<wd l="2030" t="12091" r="2472" b="12250">them</wd>

<space/>

<wd l="2558" t="12091" r="2726" b="12245">in</wd>

<space/>

<wd l="2813" t="12091" r="3082" b="12250">the</wd>

<space/>

<wd l="3178" t="12091" r="3696" b="12250">future</wd>

<space/>

<wd l="3787" t="12091" r="3941" b="12245">if</wd>

<space/>

<wd l="4003" t="12139" r="4258" b="12250">we</wd>

<space/>

<wd l="4349" t="12110" r="4776" b="12250">want</wd>

<space/>

<wd l="4862" t="12110" r="5030" b="12250">to</wd>

<space/>

<wd l="5126" t="12091" r="5779" b="12250">address</wd>

<space/>

</ln>

<ln l="1426" t="12341" r="5784" b="12542" baseLine="12494" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="12341" r="1872" b="12499">other</wd>

<space/>

<wd l="1939" t="12341" r="2822" b="12542">categories</wd>

<space/>

<wd l="2899" t="12341" r="3101" b="12499">of</wd>

<space/>

<wd l="3144" t="12341" r="3653" b="12533">UGC,</wd>

<space/>

<wd l="3734" t="12341" r="4128" b="12499">such</wd>

<space/>

<wd l="4200" t="12389" r="4378" b="12499">as</wd>

<space/>

<wd l="4450" t="12341" r="4891" b="12499">chats</wd>

<space/>

<wd l="4963" t="12341" r="5285" b="12499">and</wd>

<space/>

<wd l="5352" t="12341" r="5784" b="12499">short</wd>

<space/>

</ln>

<ln l="1421" t="12605" r="2290" b="12797" baseLine="12744" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="12643" r="2290" b="12797">messages.</wd>

</ln>

</para>

<para l="1416" t="12850" r="5784" b="14822" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1646" t="12850" r="5784" b="13008" baseLine="12998" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1646" t="12850" r="2774" b="13008">UGCNormal</wd>

<space/>

<wd l="2938" t="12850" r="3413" b="13008">made</wd>

<space/>

<wd l="3595" t="12850" r="3893" b="13008">149</wd>

<space/>

<wd l="4061" t="12850" r="5030" b="13008">corrections</wd>

<space/>

<wd l="5194" t="12850" r="5362" b="13003">in</wd>

<space/>

<wd l="5515" t="12850" r="5784" b="13008">the</wd>

<space/>

</ln>

<ln l="1421" t="13099" r="5779" b="13301" baseLine="13253" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="13099" r="2165" b="13301">Buscapé</wd>

<space/>

<wd l="2222" t="13099" r="2885" b="13301">sample,</wd>

<space/>

<wd l="2942" t="13099" r="3144" b="13258">of</wd>

<space/>

<wd l="3173" t="13099" r="3710" b="13258">which</wd>

<space/>

<wd l="3782" t="13099" r="4075" b="13258">138</wd>

<space/>

<wd l="4133" t="13147" r="4560" b="13258">were</wd>

<space/>

<wd l="4613" t="13118" r="4954" b="13258">true</wd>

<space/>

<wd l="5002" t="13099" r="5779" b="13301">positives</wd>

<space/>

</ln>

<ln l="1426" t="13354" r="5779" b="13555" baseLine="13507" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="13354" r="1742" b="13512">and</wd>

<space/>

<wd l="1848" t="13354" r="2021" b="13507">11</wd>

<space/>

<wd l="2126" t="13402" r="2549" b="13512">were</wd>

<space/>

<wd l="2640" t="13354" r="3048" b="13512">false</wd>

<space/>

<wd l="3130" t="13354" r="3912" b="13555">positives</wd>

<space/>

<wd l="4008" t="13354" r="5165" b="13555">(well-formed</wd>

<space/>

<wd l="5246" t="13354" r="5779" b="13512">words</wd>

<space/>

</ln>

<ln l="1421" t="13608" r="5784" b="13810" baseLine="13757" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="13608" r="1747" b="13766">that</wd>

<space/>

<wd l="1858" t="13656" r="2285" b="13766">were</wd>

<space/>

<wd l="2400" t="13608" r="3350" b="13810">incorrectly</wd>

<space/>

<wd l="3456" t="13608" r="4373" b="13810">modified),</wd>

<space/>

<wd l="4493" t="13608" r="5578" b="13810">representing</wd>

<space/>

<wd l="5688" t="13656" r="5784" b="13766">a</wd>

<space/>

</ln>

<ln l="1416" t="13858" r="5779" b="14059" baseLine="14011" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="13858" r="2227" b="14059">precision</wd>

<space/>

<wd l="2304" t="13858" r="2506" b="14016">of</wd>

<space/>

<wd l="2563" t="13858" r="3005" b="14021">93%.</wd>

<space/>

<wd l="3101" t="13862" r="3274" b="14011">In</wd>

<space/>

<wd l="3350" t="13858" r="3619" b="14016">the</wd>

<space/>

<wd l="3696" t="13858" r="4474" b="14016">Mercado</wd>

<space/>

<wd l="4555" t="13858" r="5030" b="14016">Livre</wd>

<space/>

<wd l="5117" t="13858" r="5779" b="14059">sample,</wd>

<space/>

</ln>

<ln l="1421" t="14112" r="5779" b="14304" baseLine="14266" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14112" r="2549" b="14270">UGCNormal</wd>

<space/>

<wd l="2611" t="14112" r="3082" b="14270">made</wd>

<space/>

<wd l="3149" t="14112" r="3470" b="14270">220</wd>

<space/>

<wd l="3538" t="14112" r="4555" b="14304">corrections,</wd>

<space/>

<wd l="4627" t="14112" r="4829" b="14270">of</wd>

<space/>

<wd l="4862" t="14112" r="5400" b="14270">which</wd>

<space/>

<wd l="5458" t="14112" r="5779" b="14270">206</wd>

<space/>

</ln>

<ln l="1421" t="14366" r="5774" b="14568" baseLine="14515" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14414" r="1843" b="14525">were</wd>

<space/>

<wd l="1934" t="14386" r="2270" b="14525">true</wd>

<space/>

<wd l="2352" t="14366" r="3134" b="14568">positives</wd>

<space/>

<wd l="3230" t="14366" r="3547" b="14525">and</wd>

<space/>

<wd l="3653" t="14366" r="3845" b="14520">14</wd>

<space/>

<wd l="3931" t="14414" r="4358" b="14525">were</wd>

<space/>

<wd l="4454" t="14366" r="4858" b="14525">false</wd>

<space/>

<wd l="4939" t="14366" r="5774" b="14568">positives,</wd>

<space/>

</ln>

<ln l="1426" t="14621" r="4670" b="14822" baseLine="14770" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="14621" r="1771" b="14779">also</wd>

<space/>

<wd l="1829" t="14621" r="2918" b="14822">representing</wd>

<space/>

<wd l="2976" t="14669" r="3072" b="14779">a</wd>

<space/>

<wd l="3120" t="14621" r="3926" b="14822">precision</wd>

<space/>

<wd l="3989" t="14621" r="4190" b="14779">of</wd>

<space/>

<wd l="4229" t="14621" r="4670" b="14784">93%.</wd>

</ln>

</para>

<para l="1430" t="14870" r="5784" b="15326" alignment="justified" fli="216" lsp="exactly" lspExact="247" language="en">

<ln l="1646" t="14870" r="5784" b="15072" baseLine="15024" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="14875" r="2126" b="15029">From</wd>

<space/>

<wd l="2261" t="14870" r="2530" b="15029">the</wd>

<space/>

<wd l="2678" t="14870" r="2885" b="15029">82</wd>

<space/>

<wd l="3034" t="14870" r="3504" b="15029">OOV</wd>

<space/>

<wd l="3648" t="14870" r="4176" b="15029">words</wd>

<space/>

<wd l="4325" t="14870" r="4493" b="15024">in</wd>

<space/>

<wd l="4627" t="14870" r="4896" b="15029">the</wd>

<space/>

<wd l="5045" t="14870" r="5784" b="15072">Buscapé</wd>

<space/>

</ln>

<ln l="1430" t="15125" r="5784" b="15326" baseLine="15274" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="15125" r="2088" b="15326">sample,</wd>

<space/>

<wd l="2150" t="15125" r="3278" b="15283">UGCNormal</wd>

<space/>

<wd l="3341" t="15125" r="4166" b="15283">corrected</wd>

<space/>

<wd l="4224" t="15125" r="4421" b="15283">65</wd>

<space/>

<wd l="4488" t="15125" r="5083" b="15326">(79%),</wd>

<space/>

<wd l="5150" t="15125" r="5467" b="15283">and</wd>

<space/>

<wd l="5515" t="15125" r="5784" b="15283">the</wd>

<space/>

</ln>

</para>

</column>

<column l="6114" t="1417" r="10525" b="14971">

<para l="6125" t="1464" r="10483" b="1920" alignment="justified" spaceBefore="1" lsp="exactly" lspExact="253" language="en">

<ln l="6125" t="1464" r="10483" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1464" r="7018" b="1666">remaining</wd>

<space/>

<wd l="7123" t="1464" r="7310" b="1622">17</wd>

<space/>

<wd l="7402" t="1464" r="7934" b="1622">words</wd>

<space/>

<wd l="8026" t="1512" r="8290" b="1622">are</wd>

<space/>

<wd l="8381" t="1464" r="9346" b="1622">constituted</wd>

<space/>

<wd l="9427" t="1464" r="9629" b="1622">of</wd>

<space/>

<wd l="9701" t="1464" r="9797" b="1622">6</wd>

<space/>

<wd l="9883" t="1464" r="10483" b="1666">(7.3%)</wd>

<space/>

</ln>

<ln l="6125" t="1718" r="9470" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1738" r="6466" b="1877">true</wd>

<space/>

<wd l="6528" t="1766" r="7032" b="1877">errors</wd>

<space/>

<wd l="7094" t="1718" r="7411" b="1877">and</wd>

<space/>

<wd l="7483" t="1718" r="7656" b="1872">11</wd>

<space/>

<wd l="7742" t="1718" r="8448" b="1920">(13.4%)</wd>

<space/>

<wd l="8506" t="1718" r="8832" b="1877">real</wd>

<space/>

<wd l="8894" t="1718" r="9470" b="1877">words.</wd>

</ln>

</para>

<para l="6130" t="1968" r="10488" b="3182" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6355" t="1968" r="10483" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="1973" r="6533" b="2122">In</wd>

<space/>

<wd l="6653" t="1968" r="6922" b="2126">the</wd>

<space/>

<wd l="7046" t="1968" r="7829" b="2126">Mercado</wd>

<space/>

<wd l="7954" t="1968" r="8429" b="2126">Livre</wd>

<space/>

<wd l="8563" t="1968" r="9221" b="2170">sample,</wd>

<space/>

<wd l="9350" t="1968" r="10483" b="2126">UGCNormal</wd>

<space/>

</ln>

<ln l="6130" t="2222" r="10488" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2222" r="6970" b="2381">identified</wd>

<space/>

<wd l="7128" t="2222" r="7421" b="2381">145</wd>

<space/>

<wd l="7574" t="2222" r="8045" b="2381">OOV</wd>

<space/>

<wd l="8184" t="2222" r="8717" b="2381">words</wd>

<space/>

<wd l="8866" t="2222" r="9182" b="2381">and</wd>

<space/>

<wd l="9322" t="2222" r="10488" b="2424">appropriately</wd>

<space/>

</ln>

<ln l="6130" t="2477" r="10478" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2477" r="6946" b="2635">corrected</wd>

<space/>

<wd l="7066" t="2477" r="7358" b="2635">117</wd>

<space/>

<wd l="7474" t="2477" r="8227" b="2678">(80.6%).</wd>

<space/>

<wd l="8338" t="2482" r="8818" b="2635">From</wd>

<space/>

<wd l="8909" t="2477" r="9178" b="2635">the</wd>

<space/>

<wd l="9278" t="2477" r="10171" b="2678">remaining</wd>

<space/>

<wd l="10272" t="2477" r="10478" b="2635">28</wd>

<space/>

</ln>

<ln l="6130" t="2726" r="10483" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2726" r="6600" b="2885">OOV</wd>

<space/>

<wd l="6701" t="2726" r="7286" b="2918">words,</wd>

<space/>

<wd l="7421" t="2726" r="7613" b="2880">14</wd>

<space/>

<wd l="7723" t="2726" r="8318" b="2928">(9.6%)</wd>

<space/>

<wd l="8429" t="2774" r="8693" b="2885">are</wd>

<space/>

<wd l="8794" t="2746" r="9134" b="2885">true</wd>

<space/>

<wd l="9245" t="2774" r="9744" b="2885">errors</wd>

<space/>

<wd l="9854" t="2726" r="10171" b="2885">and</wd>

<space/>

<wd l="10291" t="2726" r="10483" b="2880">14</wd>

<space/>

</ln>

<ln l="6130" t="2981" r="8074" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2981" r="6725" b="3182">(9.6%)</wd>

<space/>

<wd l="6792" t="3029" r="7051" b="3139">are</wd>

<space/>

<wd l="7109" t="2981" r="7430" b="3139">real</wd>

<space/>

<wd l="7493" t="2981" r="8074" b="3139">words.</wd>

</ln>

</para>

<para l="6125" t="3235" r="10493" b="4152" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6355" t="3235" r="10483" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="3235" r="6696" b="3394">The</wd>

<space/>

<wd l="6806" t="3235" r="7210" b="3394">false</wd>

<space/>

<wd l="7315" t="3235" r="8098" b="3437">positives</wd>

<space/>

<wd l="8213" t="3235" r="8606" b="3437">(real</wd>

<space/>

<wd l="8712" t="3235" r="9250" b="3394">words</wd>

<space/>

<wd l="9360" t="3235" r="10205" b="3394">identified</wd>

<space/>

<wd l="10310" t="3283" r="10483" b="3394">as</wd>

<space/>

</ln>

<ln l="6130" t="3490" r="10488" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="3490" r="6701" b="3691">errors)</wd>

<space/>

<wd l="6835" t="3538" r="7094" b="3648">are</wd>

<space/>

<wd l="7219" t="3490" r="7829" b="3691">mainly</wd>

<space/>

<wd l="7954" t="3490" r="8578" b="3691">foreign</wd>

<space/>

<wd l="8702" t="3490" r="9082" b="3648">loan</wd>

<space/>

<wd l="9197" t="3490" r="9782" b="3682">words,</wd>

<space/>

<wd l="9907" t="3538" r="10488" b="3691">proper</wd>

<space/>

</ln>

<ln l="6125" t="3739" r="10493" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3787" r="6701" b="3931">nouns,</wd>

<space/>

<wd l="6792" t="3787" r="7632" b="3941">acronyms</wd>

<space/>

<wd l="7723" t="3739" r="8040" b="3898">and</wd>

<space/>

<wd l="8122" t="3744" r="8798" b="3898">Internet</wd>

<space/>

<wd l="8890" t="3739" r="9341" b="3941">slang</wd>

<space/>

<wd l="9427" t="3739" r="9984" b="3898">absent</wd>

<space/>

<wd l="10066" t="3739" r="10493" b="3898">from</wd>

<space/>

</ln>

<ln l="6125" t="3994" r="8573" b="4152" baseLine="4142">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6125" t="3994" r="6394" b="4152">the</wd>

<space/>

</run>

<wd l="6451" t="3994" r="7742" b="4152"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">UGCNormal</run>

<run underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">’</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">s</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7805" t="3994" r="8573" b="4152">lexicons.</wd>

</run>

</ln>

</para>

<para l="6130" t="4411" r="8419" b="4570" alignment="left" spaceBefore="169" lsp="exactly" lspExact="249" language="en">

<ln l="6130" t="4411" r="8419" b="4570" baseLine="4565" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="4411" r="6446" b="4570">5.2.</wd>

<space/>

<wd l="6485" t="4411" r="7334" b="4570">Extrinsic</wd>

<space/>

<wd l="7392" t="4411" r="8419" b="4570">Evaluation</wd>

</ln>

</para>

<para l="6125" t="4781" r="10498" b="5486" alignment="justified" spaceBefore="114" lsp="exactly" lspExact="252" language="en">

<ln l="6130" t="4781" r="10498" b="4973" baseLine="4930" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="4786" r="6365" b="4939">To</wd>

<space/>

<wd l="6442" t="4781" r="7133" b="4939">validate</wd>

<space/>

<wd l="7210" t="4781" r="7478" b="4939">the</wd>

<space/>

<wd l="7550" t="4781" r="8774" b="4939">normalization</wd>

<space/>

<wd l="8846" t="4781" r="9240" b="4973">tool,</wd>

<space/>

<wd l="9322" t="4829" r="9576" b="4939">we</wd>

<space/>

<wd l="9653" t="4781" r="10498" b="4939">evaluated</wd>

<space/>

</ln>

<ln l="6130" t="5030" r="10483" b="5232" baseLine="5184" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5030" r="6331" b="5189">its</wd>

<space/>

<wd l="6437" t="5030" r="7027" b="5232">impact</wd>

<space/>

<wd l="7133" t="5078" r="7306" b="5189">as</wd>

<space/>

<wd l="7411" t="5078" r="7507" b="5189">a</wd>

<space/>

<wd l="7598" t="5030" r="8827" b="5232">preprocessing</wd>

<space/>

<wd l="8933" t="5050" r="9278" b="5232">step</wd>

<space/>

<wd l="9384" t="5030" r="9552" b="5184">in</wd>

<space/>

<wd l="9648" t="5050" r="9970" b="5189">two</wd>

<space/>

<wd l="10070" t="5035" r="10483" b="5189">NLP</wd>

<space/>

</ln>

<ln l="6125" t="5285" r="10157" b="5486" baseLine="5438" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5285" r="6605" b="5443">tasks:</wd>

<space/>

<wd l="6682" t="5285" r="7080" b="5443">POS</wd>

<space/>

<wd l="7138" t="5285" r="7795" b="5486">tagging</wd>

<space/>

<wd l="7853" t="5285" r="8174" b="5443">and</wd>

<space/>

<wd l="8227" t="5285" r="8899" b="5486">opinion</wd>

<space/>

<wd l="8957" t="5285" r="10157" b="5443">classification.</wd>

</ln>

</para>

<para l="6125" t="5539" r="10488" b="7003" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="5539" r="10483" b="5741" baseLine="5688" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="5544" r="6662" b="5698">For</wd>

<space/>

<wd l="6710" t="5539" r="6979" b="5698">the</wd>

<space/>

<wd l="7042" t="5539" r="7387" b="5698">first</wd>

<space/>

<wd l="7440" t="5539" r="7843" b="5731">task,</wd>

<space/>

<wd l="7901" t="5587" r="8155" b="5698">we</wd>

<space/>

<wd l="8213" t="5539" r="8621" b="5698">used</wd>

<space/>

<wd l="8669" t="5539" r="8938" b="5698">the</wd>

<space/>

<wd l="8990" t="5558" r="9542" b="5741">tagger</wd>

<space/>

<wd l="9595" t="5539" r="10483" b="5698">MXPOST</wd>

<space/>

</ln>

<ln l="6130" t="5794" r="10483" b="5995" baseLine="5942" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5794" r="7320" b="5995">(Ratnaparkhi,</wd>

<space/>

<wd l="7406" t="5794" r="7944" b="5995">1996),</wd>

<space/>

<wd l="8002" t="5794" r="8616" b="5952">trained</wd>

<space/>

<wd l="8669" t="5794" r="8837" b="5947">in</wd>

<space/>

<wd l="8885" t="5794" r="9154" b="5952">the</wd>

<space/>

<wd l="9206" t="5794" r="10483" b="5995">MAC-Morpho</wd>

<space/>

</ln>

<ln l="6130" t="6043" r="10478" b="6245" baseLine="6197" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6091" r="6706" b="6245">corpus</wd>

<space/>

<wd l="6792" t="6043" r="7128" b="6245">(1.2</wd>

<space/>

<wd l="7210" t="6043" r="7843" b="6202">million</wd>

<space/>

<wd l="7915" t="6043" r="8544" b="6235">tokens,</wd>

<space/>

<wd l="8621" t="6043" r="9269" b="6202">Aluisio</wd>

<space/>

<wd l="9350" t="6062" r="9504" b="6202">et</wd>

<space/>

<wd l="9581" t="6043" r="9840" b="6235">al.,</wd>

<space/>

<wd l="9926" t="6043" r="10478" b="6245">2003).</wd>

<space/>

</ln>

<ln l="6130" t="6298" r="10488" b="6499" baseLine="6446" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6298" r="6466" b="6456">The</wd>

<space/>

<wd l="6610" t="6298" r="7114" b="6456">better</wd>

<space/>

<wd l="7258" t="6298" r="7997" b="6499">reported</wd>

<space/>

<wd l="8136" t="6298" r="8702" b="6456">results</wd>

<space/>

<wd l="8856" t="6298" r="9058" b="6456">of</wd>

<space/>

<wd l="9182" t="6298" r="10066" b="6456">MXPOST</wd>

<space/>

<wd l="10224" t="6346" r="10488" b="6456">are</wd>

<space/>

</ln>

<ln l="6130" t="6552" r="10483" b="6754" baseLine="6701" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="6552" r="6744" b="6710">around</wd>

<space/>

<wd l="6792" t="6552" r="7243" b="6744">97%,</wd>

<space/>

<wd l="7306" t="6552" r="7560" b="6710">for</wd>

<space/>

<wd l="7589" t="6552" r="8587" b="6754">journalistic</wd>

<space/>

<wd l="8650" t="6571" r="9115" b="6744">texts,</wd>

<space/>

<wd l="9173" t="6552" r="9442" b="6710">the</wd>

<space/>

<wd l="9504" t="6600" r="9941" b="6710">same</wd>

<space/>

<wd l="10003" t="6600" r="10483" b="6754">genre</wd>

<space/>

</ln>

<ln l="6125" t="6802" r="8179" b="7003" baseLine="6955" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6802" r="6533" b="6960">used</wd>

<space/>

<wd l="6581" t="6821" r="6749" b="6960">to</wd>

<space/>

<wd l="6811" t="6802" r="7210" b="6960">train</wd>

<space/>

<wd l="7267" t="6802" r="7536" b="6960">the</wd>

<space/>

<wd l="7589" t="6821" r="8179" b="7003">tagger.</wd>

</ln>

</para>

<para l="6120" t="7056" r="10498" b="10042" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="7056" r="10498" b="7258" baseLine="7210" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6350" t="7061" r="6662" b="7214">For</wd>

<space/>

<wd l="6710" t="7056" r="7022" b="7214">this</wd>

<space/>

<wd l="7085" t="7056" r="8117" b="7258">experiment,</wd>

<space/>

<wd l="8174" t="7104" r="8429" b="7214">we</wd>

<space/>

<wd l="8486" t="7056" r="8837" b="7214">first</wd>

<space/>

<wd l="8890" t="7056" r="9734" b="7258">randomly</wd>

<space/>

<wd l="9792" t="7056" r="10498" b="7214">selected</wd>

<space/>

</ln>

<ln l="6130" t="7310" r="10478" b="7512" baseLine="7459" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="7358" r="6226" b="7469">a</wd>

<space/>

<wd l="6283" t="7310" r="6898" b="7512">sample</wd>

<space/>

<wd l="6955" t="7310" r="7157" b="7469">of</wd>

<space/>

<wd l="7190" t="7330" r="7454" b="7469">ten</wd>

<space/>

<wd l="7512" t="7310" r="8189" b="7469">reviews</wd>

<space/>

<wd l="8251" t="7310" r="8674" b="7469">from</wd>

<space/>

<wd l="8722" t="7310" r="8990" b="7469">the</wd>

<space/>

<wd l="9048" t="7310" r="9792" b="7512">Buscapé</wd>

<space/>

<wd l="9854" t="7358" r="10478" b="7512">corpus.</wd>

<space/>

</ln>

<ln l="6130" t="7560" r="10493" b="7762" baseLine="7714" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="7560" r="6576" b="7718">Then</wd>

<space/>

<wd l="6658" t="7608" r="6912" b="7718">we</wd>

<space/>

<wd l="6998" t="7560" r="7584" b="7762">tagged</wd>

<space/>

<wd l="7661" t="7560" r="7930" b="7718">the</wd>

<space/>

<wd l="8026" t="7560" r="8635" b="7762">sample</wd>

<space/>

<wd l="8722" t="7560" r="9115" b="7718">with</wd>

<space/>

<wd l="9197" t="7560" r="10080" b="7718">MXPOST</wd>

<space/>

<wd l="10171" t="7560" r="10493" b="7718">and</wd>

<space/>

</ln>

<ln l="6120" t="7814" r="10488" b="8016" baseLine="7968" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="7814" r="7042" b="8016">performed</wd>

<space/>

<wd l="7094" t="7862" r="7190" b="7973">a</wd>

<space/>

<wd l="7238" t="7814" r="8045" b="8016">linguistic</wd>

<space/>

<wd l="8102" t="7814" r="8808" b="7973">revision</wd>

<space/>

<wd l="8866" t="7814" r="9067" b="7973">of</wd>

<space/>

<wd l="9091" t="7814" r="9360" b="7973">the</wd>

<space/>

<wd l="9408" t="7814" r="9806" b="7973">POS</wd>

<space/>

<wd l="9859" t="7834" r="10262" b="8016">tags,</wd>

<space/>

<wd l="10325" t="7814" r="10488" b="7968">in</wd>

<space/>

</ln>

<ln l="6130" t="8069" r="10493" b="8270" baseLine="8218" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="8069" r="6590" b="8227">order</wd>

<space/>

<wd l="6754" t="8088" r="6922" b="8227">to</wd>

<space/>

<wd l="7094" t="8088" r="7618" b="8227">create</wd>

<space/>

<wd l="7790" t="8117" r="7886" b="8227">a</wd>

<space/>

<wd l="8054" t="8069" r="9269" b="8270">gold-standard</wd>

<space/>

<wd l="9427" t="8069" r="10493" b="8270">POS-tagged</wd>

<space/>

</ln>

<ln l="6125" t="8323" r="10483" b="8525" baseLine="8472" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8323" r="6773" b="8482">version</wd>

<space/>

<wd l="6878" t="8323" r="7080" b="8482">of</wd>

<space/>

<wd l="7157" t="8323" r="7426" b="8482">the</wd>

<space/>

<wd l="7541" t="8323" r="8198" b="8525">sample.</wd>

<space/>

<wd l="8323" t="8323" r="9542" b="8525">Subsequently,</wd>

<space/>

<wd l="9653" t="8371" r="9907" b="8482">we</wd>

<space/>

<wd l="10013" t="8323" r="10483" b="8482">POS-</wd>

</ln>

<ln l="6125" t="8573" r="10488" b="8774" baseLine="8726" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8573" r="6715" b="8774">tagged</wd>

<space/>

<wd l="6869" t="8573" r="7306" b="8731">three</wd>

<space/>

<wd l="7469" t="8573" r="8222" b="8731">different</wd>

<space/>

<wd l="8381" t="8573" r="9110" b="8731">versions</wd>

<space/>

<wd l="9274" t="8573" r="9475" b="8731">of</wd>

<space/>

<wd l="9614" t="8573" r="9878" b="8731">the</wd>

<space/>

<wd l="10046" t="8621" r="10488" b="8731">same</wd>

<space/>

</ln>

<ln l="6134" t="8827" r="10493" b="9029" baseLine="8981" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="8827" r="6792" b="9029">sample:</wd>

<space/>

<wd l="6888" t="8827" r="7037" b="9029">1)</wd>

<space/>

<wd l="7099" t="8827" r="7363" b="8986">the</wd>

<space/>

<wd l="7426" t="8827" r="8098" b="9029">original</wd>

<space/>

<wd l="8160" t="8875" r="8520" b="9019">one;</wd>

<space/>

<wd l="8592" t="8827" r="8765" b="9029">2)</wd>

<space/>

<wd l="8827" t="8875" r="8923" b="8986">a</wd>

<space/>

<wd l="8971" t="8827" r="9619" b="8986">version</wd>

<space/>

<wd l="9672" t="8827" r="10493" b="9029">manually</wd>

<space/>

</ln>

<ln l="6125" t="9082" r="10493" b="9283" baseLine="9230" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="9082" r="7162" b="9274">normalized,</wd>

<space/>

<wd l="7349" t="9082" r="7670" b="9240">and</wd>

<space/>

<wd l="7843" t="9082" r="8016" b="9283">3)</wd>

<space/>

<wd l="8203" t="9130" r="8299" b="9240">a</wd>

<space/>

<wd l="8472" t="9082" r="9120" b="9240">version</wd>

<space/>

<wd l="9298" t="9082" r="10493" b="9283">automatically</wd>

<space/>

</ln>

<ln l="6125" t="9331" r="10488" b="9533" baseLine="9485" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="9331" r="7118" b="9490">normalized</wd>

<space/>

<wd l="7205" t="9331" r="7430" b="9533">by</wd>

<space/>

<wd l="7526" t="9331" r="8712" b="9490">UGCNormal.</wd>

<space/>

<wd l="8827" t="9331" r="9163" b="9490">The</wd>

<space/>

<wd l="9264" t="9331" r="9835" b="9490">results</wd>

<space/>

<wd l="9946" t="9331" r="10142" b="9490">of</wd>

<space/>

<wd l="10219" t="9331" r="10488" b="9490">the</wd>

<space/>

</ln>

<ln l="6125" t="9586" r="10493" b="9787" baseLine="9739" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="9586" r="6562" b="9744">three</wd>

<space/>

<wd l="6614" t="9586" r="7339" b="9744">versions</wd>

<space/>

<wd l="7402" t="9586" r="8242" b="9744">evaluated</wd>

<space/>

<wd l="8294" t="9586" r="8909" b="9787">against</wd>

<space/>

<wd l="8957" t="9586" r="9226" b="9744">the</wd>

<space/>

<wd l="9283" t="9586" r="10493" b="9787">gold-standard</wd>

<space/>

</ln>

<ln l="6125" t="9840" r="9307" b="10042" baseLine="9989" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="9840" r="6773" b="9998">version</wd>

<space/>

<wd l="6830" t="9888" r="7094" b="9998">are</wd>

<space/>

<wd l="7147" t="9840" r="7997" b="10042">presented</wd>

<space/>

<wd l="8054" t="9840" r="8222" b="9994">in</wd>

<space/>

<wd l="8275" t="9840" r="8544" b="9998">the</wd>

<space/>

<wd l="8602" t="9840" r="9096" b="9998">Table</wd>

<space/>

<wd l="9158" t="9840" r="9307" b="9998">3.</wd>

</ln>

</para>

<para l="6125" t="10368" r="10488" b="10819" alignment="justified" spaceBefore="272" lsp="exactly" lspExact="248" language="en">

<ln l="6130" t="10368" r="10488" b="10570" baseLine="10522" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10368" r="6624" b="10526">Table</wd>

<space/>

<wd l="6691" t="10368" r="6840" b="10526">3:</wd>

<space/>

<wd l="6922" t="10368" r="7262" b="10526">The</wd>

<space/>

<wd l="7325" t="10368" r="7997" b="10526">number</wd>

<space/>

<wd l="8064" t="10368" r="8261" b="10526">of</wd>

<space/>

<wd l="8309" t="10387" r="8914" b="10526">correct</wd>

<space/>

<wd l="8976" t="10387" r="9326" b="10570">tags</wd>

<space/>

<wd l="9389" t="10368" r="10214" b="10570">produced</wd>

<space/>

<wd l="10262" t="10368" r="10488" b="10570">by</wd>

<space/>

</ln>

<ln l="6125" t="10622" r="9250" b="10819" baseLine="10771" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10622" r="6394" b="10781">the</wd>

<space/>

<wd l="6446" t="10642" r="7046" b="10819">tagger,</wd>

<space/>

<wd l="7114" t="10622" r="7368" b="10781">for</wd>

<space/>

<wd l="7421" t="10622" r="7824" b="10781">each</wd>

<space/>

<wd l="7886" t="10622" r="8496" b="10819">sample</wd>

<space/>

<wd l="8554" t="10622" r="9250" b="10781">version.</wd>

</ln>

</para>

<table l="6120" t="10824" r="10488" b="13176" alignment="left" li="6" ri="37" spaceAfter="274">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<gridTable>

<gridCol>998</gridCol>

<gridCol>1047</gridCol>

<gridCol>1157</gridCol>

<gridCol>1166</gridCol>

<gridRow>859</gridRow>

<gridRow>639</gridRow>

<gridRow>854</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="6120" t="10824" r="7118" b="11683" language="en">

<ln l="0" t="0" r="0" b="0" baseLine="0" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable">

<nl orig="true"/>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="7272" t="10982" r="7987" b="11323" alignment="centered" spaceBefore="129" spaceAfter="309" lsp="exactly" lspExact="208" language="en">

<ln l="7344" t="10982" r="7939" b="11117" baseLine="11107" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="7344" t="10982" r="7939" b="11117">Without
</wd>

</ln>

<ln l="7272" t="11189" r="7987" b="11323" baseLine="11318" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="7272" t="11189" r="7987" b="11323">Normaliz.</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="8376" t="10987" r="9091" b="11530" alignment="centered" spaceBefore="128" spaceAfter="102" lsp="exactly" lspExact="208" language="en">

<ln l="8554" t="10987" r="8933" b="11117" baseLine="11107" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8554" t="10987" r="8933" b="11117">After
</wd>

</ln>

<ln l="8477" t="11198" r="9005" b="11323" baseLine="11318" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8477" t="11198" r="9000" b="11323">Human
</wd>

</ln>

<ln l="8376" t="11395" r="9091" b="11530" baseLine="11525" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8376" t="11395" r="9091" b="11530">Normaliz.</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="9523" t="10987" r="10277" b="11530" alignment="centered" spaceBefore="128" spaceAfter="102" lsp="exactly" lspExact="208" language="en">

<ln l="9715" t="10987" r="10094" b="11117" baseLine="11107" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="9715" t="10987" r="10094" b="11117">After
</wd>

</ln>

<ln l="9523" t="11189" r="10277" b="11323" baseLine="11318" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="9523" t="11189" r="10277" b="11323">Automatic
</wd>

</ln>

<ln l="9533" t="11395" r="10248" b="11530" baseLine="11525" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="9533" t="11395" r="10248" b="11530">Normaliz.</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="6360" t="11808" r="6898" b="12206" alignment="centered" spaceBefore="115" spaceAfter="98" lsp="exactly" lspExact="208" language="en">

<ln l="6360" t="11808" r="6898" b="11962" baseLine="11957" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6360" t="11832" r="6898" b="11962">Correct
</wd>

</ln>

<ln l="6480" t="12038" r="6763" b="12206" baseLine="12163" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6480" t="12053" r="6763" b="12206">tags</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="7445" t="11842" r="7858" b="12000" alignment="left" li="244" spaceBefore="111" spaceAfter="265" lsp="exactly" lspExact="253" language="en">

<ln l="7445" t="11842" r="7858" b="12000" baseLine="11990" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="7445" t="11842" r="7858" b="12000">1120</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="8549" t="11842" r="8952" b="12000" alignment="centered" spaceBefore="111" spaceAfter="265" lsp="exactly" lspExact="253" language="en">

<ln l="8549" t="11842" r="8952" b="12000" baseLine="11990" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-7">

<wd l="8549" t="11842" r="8952" b="12000">1145</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="9706" t="11842" r="10118" b="11995" alignment="centered" spaceBefore="111" spaceAfter="265" lsp="exactly" lspExact="253" language="en">

<ln l="9706" t="11842" r="10118" b="11995" baseLine="11990" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="9706" t="11842" r="10118" b="11995">1142</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="6278" t="12470" r="6970" b="12638" alignment="centered" spaceBefore="116" lsp="exactly" lspExact="207" language="en">

<ln l="6278" t="12470" r="6970" b="12638" baseLine="12595" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6278" t="12470" r="6970" b="12638">Accuracy</wd>

</ln>

</para>

<para l="6600" t="12682" r="6648" b="12802" alignment="centered" lsp="exactly" lspExact="207" language="en">

<ln l="6600" t="12682" r="6648" b="12802" baseLine="12802" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6600" t="12754" r="6648" b="12768">-</wd>

</ln>

</para>

<para l="6259" t="12888" r="6984" b="13018" alignment="centered" spaceAfter="117" lsp="exactly" lspExact="207" language="en">

<ln l="6259" t="12888" r="6984" b="13018" baseLine="13008" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6259" t="12888" r="6984" b="13018">MXPOST</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="7306" t="12480" r="7978" b="12643" alignment="centered" spaceBefore="111" spaceAfter="490" lsp="exactly" lspExact="253" language="en">

<ln l="7306" t="12480" r="7978" b="12643" baseLine="12629" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="7306" t="12480" r="7978" b="12643">91.35%</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="8410" t="12480" r="9077" b="12643" alignment="centered" spaceBefore="111" spaceAfter="490" lsp="exactly" lspExact="253" language="en">

<ln l="8410" t="12480" r="9077" b="12643" baseLine="12629" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="8410" t="12480" r="9077" b="12643">93.39%</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="top">

<leftBorder type="single" width="19"/>

<topBorder type="single" width="19"/>

<rightBorder type="single" width="19"/>

<bottomBorder type="single" width="19"/>

<para l="9566" t="12480" r="10238" b="12643" alignment="centered" spaceBefore="111" spaceAfter="490" lsp="exactly" lspExact="253" language="en">

<ln l="9566" t="12480" r="10238" b="12643" baseLine="12629" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="9566" t="12480" r="10238" b="12643">93.15%</wd>

</ln>

</para>

</cell>

</table>

<para l="6125" t="13498" r="10507" b="14918" alignment="justified" spaceBefore="3" fli="216" lsp="exactly" lspExact="251" language="en">

<ln l="6355" t="13498" r="10483" b="13699" baseLine="13646" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="13498" r="6696" b="13656">The</wd>

<space/>

<wd l="6773" t="13546" r="7550" b="13699">accuracy</wd>

<space/>

<wd l="7618" t="13498" r="8179" b="13656">values</wd>

<space/>

<wd l="8261" t="13546" r="8520" b="13656">are</wd>

<space/>

<wd l="8597" t="13498" r="8861" b="13656">the</wd>

<space/>

<wd l="8938" t="13498" r="9336" b="13656">ratio</wd>

<space/>

<wd l="9408" t="13498" r="10147" b="13656">between</wd>

<space/>

<wd l="10219" t="13498" r="10483" b="13656">the</wd>

<space/>

</ln>

<ln l="6125" t="13747" r="10507" b="13949" baseLine="13901" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13747" r="6797" b="13906">number</wd>

<space/>

<wd l="6888" t="13747" r="7090" b="13906">of</wd>

<space/>

<wd l="7162" t="13766" r="7766" b="13906">correct</wd>

<space/>

<wd l="7858" t="13766" r="8203" b="13949">tags</wd>

<space/>

<wd l="8304" t="13747" r="8621" b="13906">and</wd>

<space/>

<wd l="8702" t="13747" r="8971" b="13906">the</wd>

<space/>

<wd l="9062" t="13747" r="9451" b="13906">total</wd>

<space/>

<wd l="9542" t="13747" r="10214" b="13906">number</wd>

<space/>

<wd l="10306" t="13747" r="10507" b="13906">of</wd>

<space/>

</ln>

<ln l="6125" t="14002" r="10488" b="14203" baseLine="14155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14021" r="6475" b="14203">tags</wd>

<space/>

<wd l="6696" t="14002" r="7320" b="14203">(1226).</wd>

<space/>

<wd l="7546" t="14002" r="7882" b="14160">The</wd>

<space/>

<wd l="8093" t="14002" r="8582" b="14160">result</wd>

<space/>

<wd l="8798" t="14002" r="9581" b="14160">achieved</wd>

<space/>

<wd l="9782" t="14002" r="10008" b="14203">by</wd>

<space/>

<wd l="10219" t="14002" r="10488" b="14160">the</wd>

<space/>

</ln>

<ln l="6130" t="14256" r="10478" b="14458" baseLine="14405" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="14256" r="7320" b="14458">automatically</wd>

<space/>

<wd l="7402" t="14256" r="8395" b="14414">normalized</wd>

<space/>

<wd l="8472" t="14256" r="9125" b="14414">version</wd>

<space/>

<wd l="9211" t="14256" r="10478" b="14458">(UGCNormal)</wd>

<space/>

</ln>

<ln l="6130" t="14506" r="10488" b="14707" baseLine="14659" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="14506" r="6269" b="14664">is</wd>

<space/>

<wd l="6336" t="14506" r="6917" b="14664">almost</wd>

<space/>

<wd l="6974" t="14506" r="7243" b="14664">the</wd>

<space/>

<wd l="7310" t="14554" r="7747" b="14664">same</wd>

<space/>

<wd l="7819" t="14554" r="7992" b="14664">as</wd>

<space/>

<wd l="8054" t="14506" r="8386" b="14664">that</wd>

<space/>

<wd l="8448" t="14506" r="9230" b="14664">achieved</wd>

<space/>

<wd l="9278" t="14506" r="9504" b="14707">by</wd>

<space/>

<wd l="9562" t="14506" r="9830" b="14664">the</wd>

<space/>

<wd l="9893" t="14506" r="10488" b="14664">human</wd>

<space/>

</ln>

<ln l="6125" t="14760" r="7858" b="14918" baseLine="14914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14760" r="7118" b="14918">normalized</wd>

<space/>

<wd l="7166" t="14760" r="7858" b="14918">version.</wd>

</ln>

</para>

</column>

</section>

<dd l="1403" t="15736" r="10525" b="15977">

<para l="5800" t="15792" r="6148" b="15941" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15792" r="6082" b="15941" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="43">

<wd l="5866" t="15792" r="6082" b="15941">44</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1402" marginTop="1417" marginRight="1385" marginBottom="1302" offsetX="2" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1402" t="1417" r="10524" b="15360">

<column l="1402" t="1417" r="5813" b="15360">

<para l="1416" t="1464" r="5789" b="3691" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1646" t="1464" r="5779" b="1622" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="1469" r="1949" b="1622">We</wd>

<space/>

<wd l="2122" t="1464" r="2530" b="1622">have</wd>

<space/>

<wd l="2707" t="1464" r="3053" b="1622">also</wd>

<space/>

<wd l="3226" t="1464" r="3696" b="1622">made</wd>

<space/>

<wd l="3874" t="1512" r="3970" b="1622">a</wd>

<space/>

<wd l="4133" t="1483" r="4440" b="1622">test</wd>

<space/>

<wd l="4608" t="1464" r="4810" b="1622">of</wd>

<space/>

<wd l="4963" t="1464" r="5779" b="1622">statistical</wd>

<space/>

</ln>

<ln l="1430" t="1718" r="5789" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="1718" r="2482" b="1920">significance</wd>

<space/>

<wd l="2554" t="1738" r="2722" b="1877">to</wd>

<space/>

<wd l="2803" t="1718" r="3528" b="1877">evaluate</wd>

<space/>

<wd l="3600" t="1718" r="3869" b="1877">the</wd>

<space/>

<wd l="3931" t="1718" r="4906" b="1920">probability</wd>

<space/>

<wd l="4978" t="1718" r="5314" b="1877">that</wd>

<space/>

<wd l="5390" t="1718" r="5789" b="1877">such</wd>

<space/>

</ln>

<ln l="1426" t="1968" r="5784" b="2170" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="1968" r="2592" b="2170">improvement</wd>

<space/>

<wd l="2678" t="1968" r="2846" b="2122">in</wd>

<space/>

<wd l="2928" t="1968" r="3192" b="2126">the</wd>

<space/>

<wd l="3278" t="1987" r="3830" b="2170">tagger</wd>

<space/>

<wd l="3902" t="1968" r="4718" b="2170">precision</wd>

<space/>

<wd l="4805" t="1968" r="5294" b="2126">could</wd>

<space/>

<wd l="5371" t="1968" r="5784" b="2126">have</wd>

<space/>

</ln>

<ln l="1416" t="2222" r="5779" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1416" t="2222" r="1834" b="2381">been</wd>

<space/>

<wd l="1920" t="2222" r="2674" b="2381">obtained</wd>

<space/>

<wd l="2741" t="2222" r="2966" b="2424">by</wd>

<space/>

<wd l="3043" t="2222" r="3696" b="2381">chance.</wd>

<space/>

<wd l="3787" t="2222" r="4320" b="2381">Given</wd>

<space/>

<wd l="4397" t="2222" r="4666" b="2381">the</wd>

<space/>

<wd l="4752" t="2222" r="5362" b="2424">sample</wd>

<space/>

<wd l="5453" t="2222" r="5779" b="2381">size</wd>

<space/>

</ln>

<ln l="1426" t="2477" r="5784" b="2635" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2477" r="1742" b="2635">and</wd>

<space/>

<wd l="2026" t="2525" r="2477" b="2635">some</wd>

<space/>

<wd l="2755" t="2477" r="3466" b="2635">relevant</wd>

<space/>

<wd l="3749" t="2477" r="5011" b="2635">considerations</wd>

<space/>

<wd l="5294" t="2477" r="5784" b="2635">while</wd>

<space/>

</ln>

<ln l="1426" t="2726" r="5779" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2726" r="2338" b="2928">evaluating</wd>

<space/>

<wd l="2410" t="2731" r="2822" b="2885">NLP</wd>

<space/>

<wd l="2909" t="2726" r="3341" b="2885">tasks</wd>

<space/>

<wd l="3427" t="2726" r="4224" b="2928">(Sogaard</wd>

<space/>

<wd l="4301" t="2746" r="4454" b="2885">et</wd>

<space/>

<wd l="4536" t="2726" r="4795" b="2918">al.,</wd>

<space/>

<wd l="4882" t="2726" r="5438" b="2928">2014),</wd>

<space/>

<wd l="5525" t="2774" r="5779" b="2885">we</wd>

<space/>

</ln>

<ln l="1426" t="2981" r="5784" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="2981" r="1915" b="3182">opted</wd>

<space/>

<wd l="2078" t="2981" r="2333" b="3139">for</wd>

<space/>

<wd l="2496" t="2981" r="2765" b="3139">the</wd>

<space/>

<wd l="2933" t="2981" r="4272" b="3182">non-parametric</wd>

<space/>

<wd l="4445" t="3000" r="4752" b="3139">test</wd>

<space/>

<wd l="4915" t="2981" r="5784" b="3139">Wilcoxon</wd>

<space/>

</ln>

<ln l="1430" t="3235" r="5779" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="3235" r="2606" b="3437">Signed-Rank.</wd>

<space/>

<wd l="2664" t="3240" r="2971" b="3394">We</wd>

<space/>

<wd l="3024" t="3235" r="3821" b="3394">observed</wd>

<space/>

<wd l="3864" t="3283" r="3960" b="3394">a</wd>

<space/>

<wd l="4013" t="3235" r="5069" b="3437">significance</wd>

<space/>

<wd l="5122" t="3235" r="5323" b="3394">of</wd>

<space/>

<wd l="5352" t="3235" r="5779" b="3427">0.05,</wd>

<space/>

</ln>

<ln l="1421" t="3490" r="4517" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="3490" r="1685" b="3648">the</wd>

<space/>

<wd l="1738" t="3490" r="2400" b="3691">p-value</wd>

<space/>

<wd l="2453" t="3490" r="2947" b="3691">being</wd>

<space/>

<wd l="3005" t="3490" r="3475" b="3691">equal</wd>

<space/>

<wd l="3533" t="3509" r="3701" b="3648">to</wd>

<space/>

<wd l="3763" t="3490" r="4517" b="3648">0.02249.</wd>

</ln>

</para>

<para l="1416" t="3739" r="5789" b="6427" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="3739" r="5789" b="3898" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="3739" r="1987" b="3898">The</wd>

<space/>

<wd l="2093" t="3739" r="2544" b="3898">other</wd>

<space/>

<wd l="2645" t="3739" r="3394" b="3898">extrinsic</wd>

<space/>

<wd l="3504" t="3739" r="4421" b="3898">evaluation</wd>

<space/>

<wd l="4522" t="3739" r="4661" b="3898">is</wd>

<space/>

<wd l="4766" t="3739" r="5275" b="3898">based</wd>

<space/>

<wd l="5376" t="3787" r="5592" b="3898">on</wd>

<space/>

<wd l="5693" t="3787" r="5789" b="3898">a</wd>

<space/>

</ln>

<ln l="1426" t="3994" r="5789" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="3994" r="2645" b="4152">lexicon-based</wd>

<space/>

<wd l="2798" t="3994" r="3466" b="4195">opinion</wd>

<space/>

<wd l="3619" t="3994" r="4411" b="4152">classifier</wd>

<space/>

<wd l="4560" t="3994" r="5314" b="4195">(Avanço</wd>

<space/>

<wd l="5472" t="3994" r="5789" b="4152">and</wd>

<space/>

</ln>

<ln l="1416" t="4248" r="5779" b="4450" baseLine="4397" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="4253" r="2030" b="4440">Nunes,</wd>

<space/>

<wd l="2165" t="4248" r="2722" b="4450">2014),</wd>

<space/>

<wd l="2851" t="4248" r="3389" b="4406">which</wd>

<space/>

<wd l="3514" t="4248" r="4138" b="4450">assigns</wd>

<space/>

<wd l="4262" t="4248" r="4954" b="4450">polarity</wd>

<space/>

<wd l="5074" t="4267" r="5242" b="4406">to</wd>

<space/>

<wd l="5366" t="4267" r="5779" b="4406">texts</wd>

<space/>

</ln>

<ln l="1426" t="4498" r="5784" b="4699" baseLine="4651" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="4498" r="2237" b="4699">(positive,</wd>

<space/>

<wd l="2352" t="4498" r="3096" b="4699">negative</wd>

<space/>

<wd l="3211" t="4546" r="3394" b="4656">or</wd>

<space/>

<wd l="3499" t="4498" r="4229" b="4699">neutral).</wd>

<space/>

<wd l="4349" t="4502" r="4651" b="4656">We</wd>

<space/>

<wd l="4766" t="4498" r="5414" b="4699">applied</wd>

<space/>

<wd l="5515" t="4498" r="5784" b="4656">the</wd>

<space/>

</ln>

<ln l="1426" t="4752" r="5779" b="4954" baseLine="4906" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="4752" r="2218" b="4910">classifier</wd>

<space/>

<wd l="2304" t="4800" r="2520" b="4910">on</wd>

<space/>

<wd l="2606" t="4800" r="2702" b="4910">a</wd>

<space/>

<wd l="2794" t="4752" r="3403" b="4954">sample</wd>

<space/>

<wd l="3494" t="4752" r="3696" b="4910">of</wd>

<space/>

<wd l="3782" t="4752" r="4349" b="4944">13,685</wd>

<space/>

<wd l="4445" t="4752" r="5126" b="4910">reviews</wd>

<space/>

<wd l="5218" t="4752" r="5779" b="4954">(6,812</wd>

<space/>

</ln>

<ln l="1416" t="5006" r="5779" b="5208" baseLine="5155" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="5006" r="2198" b="5208">positives</wd>

<space/>

<wd l="2266" t="5006" r="2587" b="5165">and</wd>

<space/>

<wd l="2650" t="5006" r="3120" b="5198">6,873</wd>

<space/>

<wd l="3197" t="5006" r="4094" b="5208">negatives)</wd>

<space/>

<wd l="4166" t="5006" r="4968" b="5165">extracted</wd>

<space/>

<wd l="5030" t="5006" r="5453" b="5165">from</wd>

<space/>

<wd l="5510" t="5006" r="5779" b="5165">the</wd>

<space/>

</ln>

<ln l="1421" t="5261" r="5789" b="5462" baseLine="5410" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5261" r="2165" b="5462">Buscapé</wd>

<space/>

<wd l="2213" t="5309" r="2842" b="5462">corpus,</wd>

<space/>

<wd l="2890" t="5261" r="3451" b="5419">before</wd>

<space/>

<wd l="3499" t="5261" r="3816" b="5419">and</wd>

<space/>

<wd l="3859" t="5261" r="4262" b="5419">after</wd>

<space/>

<wd l="4301" t="5261" r="5525" b="5419">normalization</wd>

<space/>

<wd l="5563" t="5261" r="5789" b="5462">by</wd>

<space/>

</ln>

<ln l="1421" t="5510" r="5779" b="5712" baseLine="5664" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="5510" r="2597" b="5669">UGCNormal.</wd>

<space/>

<wd l="2688" t="5510" r="3024" b="5669">The</wd>

<space/>

<wd l="3106" t="5558" r="3782" b="5712">average</wd>

<space/>

<wd l="3864" t="5510" r="4066" b="5669">of</wd>

<space/>

<wd l="4118" t="5510" r="4891" b="5669">F1-score</wd>

<space/>

<wd l="4968" t="5558" r="5779" b="5669">measures</wd>

<space/>

</ln>

<ln l="1426" t="5765" r="5779" b="5966" baseLine="5914" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="5765" r="2261" b="5966">(F1-score</wd>

<space/>

<wd l="2371" t="5765" r="3072" b="5966">positive</wd>

<space/>

<wd l="3192" t="5765" r="3514" b="5923">and</wd>

<space/>

<wd l="3624" t="5765" r="4387" b="5923">F1-score</wd>

<space/>

<wd l="4507" t="5765" r="5318" b="5966">negative)</wd>

<space/>

<wd l="5443" t="5813" r="5779" b="5923">was</wd>

<space/>

</ln>

<ln l="1426" t="6019" r="5789" b="6211" baseLine="6168" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6019" r="1910" b="6178">0.736</wd>

<space/>

<wd l="2030" t="6019" r="2285" b="6178">for</wd>

<space/>

<wd l="2395" t="6019" r="3792" b="6178">non-normalized</wd>

<space/>

<wd l="3902" t="6038" r="4368" b="6211">texts,</wd>

<space/>

<wd l="4493" t="6019" r="4814" b="6178">and</wd>

<space/>

<wd l="4925" t="6019" r="5410" b="6178">0.758</wd>

<space/>

<wd l="5534" t="6019" r="5789" b="6178">for</wd>

<space/>

</ln>

<ln l="1421" t="6269" r="2923" b="6427" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="6269" r="2414" b="6427">normalized</wd>

<space/>

<wd l="2462" t="6288" r="2923" b="6427">texts.</wd>

</ln>

</para>

<para l="1421" t="6523" r="5808" b="8242" alignment="justified" spaceBefore="1" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="1651" t="6523" r="5784" b="6725" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="6523" r="1987" b="6682">The</wd>

<space/>

<wd l="2088" t="6523" r="3202" b="6725">performance</wd>

<space/>

<wd l="3307" t="6523" r="3509" b="6682">of</wd>

<space/>

<wd l="3595" t="6571" r="3691" b="6682">a</wd>

<space/>

<wd l="3792" t="6523" r="5021" b="6682">lexicon-based</wd>

<space/>

<wd l="5122" t="6523" r="5784" b="6725">opinion</wd>

<space/>

</ln>

<ln l="1426" t="6778" r="5808" b="6979" baseLine="6926" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="6778" r="2218" b="6936">classifier</wd>

<space/>

<wd l="2261" t="6778" r="2400" b="6936">is</wd>

<space/>

<wd l="2453" t="6778" r="3014" b="6979">highly</wd>

<space/>

<wd l="3062" t="6778" r="3965" b="6979">dependent</wd>

<space/>

<wd l="4013" t="6778" r="4214" b="6936">of</wd>

<space/>

<wd l="4238" t="6778" r="4507" b="6936">the</wd>

<space/>

<wd l="4550" t="6778" r="5554" b="6979">recognition</wd>

<space/>

<wd l="5606" t="6778" r="5808" b="6936">of</wd>

<space/>

</ln>

<ln l="1430" t="7027" r="5779" b="7186" baseLine="7181" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="7027" r="2275" b="7186">sentiment</wd>

<space/>

<wd l="2424" t="7027" r="2962" b="7186">words</wd>

<space/>

<wd l="3120" t="7027" r="3288" b="7181">in</wd>

<space/>

<wd l="3437" t="7027" r="3706" b="7186">the</wd>

<space/>

<wd l="3854" t="7046" r="4229" b="7186">text.</wd>

<space/>

<wd l="4392" t="7027" r="4632" b="7186">As</wd>

<space/>

<wd l="4795" t="7075" r="5294" b="7186">errors</wd>

<space/>

<wd l="5458" t="7027" r="5779" b="7186">like</wd>

<space/>

</ln>

<ln l="1426" t="7282" r="5784" b="7483" baseLine="7435" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7282" r="2347" b="7440">“exelente”</wd>

<space/>

<wd l="2525" t="7282" r="4406" b="7483">(excelente=excellent)</wd>

<space/>

<wd l="4589" t="7282" r="4910" b="7440">and</wd>

<space/>

<wd l="5078" t="7282" r="5784" b="7440">“otimo”</wd>

<space/>

</ln>

<ln l="1426" t="7536" r="5789" b="7738" baseLine="7685" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7536" r="2635" b="7738">(ótimo=great)</wd>

<space/>

<wd l="2971" t="7584" r="3230" b="7694">are</wd>

<space/>

<wd l="3557" t="7584" r="3950" b="7738">very</wd>

<space/>

<wd l="4277" t="7536" r="5054" b="7738">frequent,</wd>

<space/>

<wd l="5395" t="7536" r="5789" b="7694">such</wd>

<space/>

</ln>

<ln l="1426" t="7790" r="5774" b="7992" baseLine="7939" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="7790" r="2592" b="7992">improvement</wd>

<space/>

<wd l="2640" t="7790" r="2808" b="7944">in</wd>

<space/>

<wd l="2846" t="7790" r="3115" b="7949">the</wd>

<space/>

<wd l="3154" t="7790" r="4013" b="7992">precision,</wd>

<space/>

<wd l="4066" t="7790" r="4469" b="7949">after</wd>

<space/>

<wd l="4507" t="7790" r="5774" b="7982">normalization,</wd>

<space/>

</ln>

<ln l="1421" t="8040" r="2640" b="8242" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="8088" r="1757" b="8198">was</wd>

<space/>

<wd l="1819" t="8040" r="2640" b="8242">expected.</wd>

</ln>

</para>

<para l="1426" t="8458" r="5722" b="8616" alignment="left" spaceBefore="167" lsp="exactly" lspExact="249" language="en">

<ln l="1426" t="8458" r="5722" b="8616" baseLine="8611" bold="true" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="8458" r="1685" b="8616">5.3</wd>

<space/>

<wd l="1790" t="8458" r="2290" b="8616">Some</wd>

<space/>

<wd l="2347" t="8458" r="3346" b="8616">limitations</wd>

<space/>

<wd l="3408" t="8458" r="3605" b="8616">of</wd>

<space/>

<wd l="3643" t="8462" r="3936" b="8616">the</wd>

<space/>

<wd l="3998" t="8458" r="5309" b="8616">normalization</wd>

<space/>

<wd l="5366" t="8462" r="5722" b="8616">tool</wd>

</ln>

</para>

<para l="1416" t="8827" r="5794" b="12317" alignment="justified" spaceBefore="117" lsp="exactly" lspExact="252" language="en">

<ln l="1426" t="8827" r="5789" b="8986" baseLine="8981" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="8827" r="1762" b="8986">The</wd>

<space/>

<wd l="1944" t="8827" r="3072" b="8986">UGCNormal</wd>

<space/>

<wd l="3264" t="8846" r="3955" b="8986">corrects</wd>

<space/>

<wd l="4142" t="8875" r="4238" b="8986">a</wd>

<space/>

<wd l="4421" t="8827" r="4747" b="8986">few</wd>

<space/>

<wd l="4930" t="8827" r="5789" b="8986">real-word</wd>

<space/>

</ln>

<ln l="1421" t="9082" r="5789" b="9283" baseLine="9230" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="9082" r="2515" b="9283">misspellings</wd>

<space/>

<wd l="2602" t="9082" r="3168" b="9240">thanks</wd>

<space/>

<wd l="3254" t="9101" r="3422" b="9240">to</wd>

<space/>

<wd l="3509" t="9082" r="3778" b="9240">the</wd>

<space/>

<wd l="3869" t="9101" r="4560" b="9283">strategy</wd>

<space/>

<wd l="4646" t="9082" r="4848" b="9240">of</wd>

<space/>

<wd l="4910" t="9082" r="5789" b="9283">extracting</wd>

<space/>

</ln>

<ln l="1426" t="9331" r="5784" b="9533" baseLine="9485" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="9331" r="1848" b="9490">from</wd>

<space/>

<wd l="1891" t="9336" r="3043" b="9490">UNITEX-PB</wd>

<space/>

<wd l="3096" t="9331" r="3562" b="9490">those</wd>

<space/>

<wd l="3614" t="9331" r="4512" b="9533">infrequent</wd>

<space/>

<wd l="4560" t="9331" r="5093" b="9490">words</wd>

<space/>

<wd l="5141" t="9331" r="5472" b="9490">that</wd>

<space/>

<wd l="5520" t="9379" r="5784" b="9490">are</wd>

<space/>

</ln>

<ln l="1421" t="9586" r="5789" b="9787" baseLine="9739" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="9586" r="2501" b="9787">homographs</wd>

<space/>

<wd l="2568" t="9586" r="3211" b="9787">(except</wd>

<space/>

<wd l="3264" t="9586" r="3490" b="9787">by</wd>

<space/>

<wd l="3547" t="9586" r="3816" b="9744">the</wd>

<space/>

<wd l="3878" t="9586" r="4747" b="9787">diacritics)</wd>

<space/>

<wd l="4819" t="9586" r="5021" b="9744">of</wd>

<space/>

<wd l="5059" t="9586" r="5789" b="9787">frequent</wd>

<space/>

</ln>

<ln l="1421" t="9840" r="5779" b="10042" baseLine="9989" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="9840" r="2002" b="9998">words.</wd>

<space/>

<wd l="2131" t="9845" r="2986" b="10032">However,</wd>

<space/>

<wd l="3106" t="9888" r="3595" b="10042">many</wd>

<space/>

<wd l="3710" t="9840" r="4574" b="9998">real-word</wd>

<space/>

<wd l="4685" t="9840" r="5779" b="10042">misspellings</wd>

<space/>

</ln>

<ln l="1421" t="10094" r="5794" b="10286" baseLine="10243" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="10094" r="2030" b="10253">remain</wd>

<space/>

<wd l="2174" t="10094" r="3019" b="10286">unsolved,</wd>

<space/>

<wd l="3178" t="10142" r="3350" b="10253">as</wd>

<space/>

<wd l="3499" t="10094" r="3960" b="10253">those</wd>

<space/>

<wd l="4118" t="10094" r="5088" b="10253">corrections</wd>

<space/>

<wd l="5237" t="10094" r="5794" b="10253">would</wd>

<space/>

</ln>

<ln l="1421" t="10344" r="5779" b="10546" baseLine="10498" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="10344" r="2040" b="10546">require</wd>

<space/>

<wd l="2141" t="10344" r="3048" b="10502">contextual</wd>

<space/>

<wd l="3149" t="10344" r="4229" b="10502">information.</wd>

<space/>

<wd l="4334" t="10344" r="4718" b="10502">This</wd>

<space/>

<wd l="4805" t="10344" r="5549" b="10546">problem</wd>

<space/>

<wd l="5640" t="10344" r="5779" b="10502">is</wd>

<space/>

</ln>

<ln l="1421" t="10598" r="5784" b="10800" baseLine="10747" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="10646" r="1867" b="10757">more</wd>

<space/>

<wd l="2011" t="10598" r="2621" b="10757">serious</wd>

<space/>

<wd l="2760" t="10598" r="3235" b="10757">when</wd>

<space/>

<wd l="3370" t="10598" r="3638" b="10757">the</wd>

<space/>

<wd l="3768" t="10598" r="4853" b="10800">homographs</wd>

<space/>

<wd l="4997" t="10646" r="5261" b="10757">are</wd>

<space/>

<wd l="5395" t="10646" r="5784" b="10800">very</wd>

<space/>

</ln>

<ln l="1426" t="10853" r="5779" b="11054" baseLine="11002">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1426" t="10853" r="2150" b="11054">frequent</wd>

<space/>

<wd l="2218" t="10853" r="2808" b="11045">words,</wd>

<space/>

<wd l="2890" t="10853" r="3283" b="11011">such</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3355" t="10901" r="3528" b="11011">as</wd>

<space/>

<wd l="3605" t="10853" r="4138" b="11011">“esta”</wd>

<space/>

<wd l="4210" t="10853" r="4790" b="11054">(=this)</wd>

<space/>

<wd l="4867" t="10853" r="5184" b="11011">and</wd>

<space/>

<wd l="5251" t="10853" r="5779" b="11011">“está”</wd>

<space/>

</run>

</ln>

<ln l="1426" t="11102" r="5784" b="11304" baseLine="11256" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="11102" r="1882" b="11304">(=is).</wd>

<space/>

<wd l="1963" t="11102" r="2645" b="11261">Besides</wd>

<space/>

<wd l="2722" t="11102" r="3854" b="11304">homographs,</wd>

<space/>

<wd l="3931" t="11150" r="4186" b="11261">we</wd>

<space/>

<wd l="4267" t="11102" r="4613" b="11261">also</wd>

<space/>

<wd l="4690" t="11102" r="5102" b="11261">have</wd>

<space/>

<wd l="5179" t="11122" r="5342" b="11261">to</wd>

<space/>

<wd l="5424" t="11102" r="5784" b="11261">deal</wd>

<space/>

</ln>

<ln l="1421" t="11357" r="5779" b="11558" baseLine="11510" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="11357" r="1810" b="11515">with</wd>

<space/>

<wd l="1939" t="11357" r="2203" b="11515">the</wd>

<space/>

<wd l="2270" t="11357" r="3307" b="11558">homophone</wd>

<space/>

<wd l="3370" t="11357" r="3902" b="11515">words</wd>

<space/>

<wd l="3974" t="11357" r="4507" b="11558">(those</wd>

<space/>

<wd l="4574" t="11357" r="4963" b="11515">with</wd>

<space/>

<wd l="5030" t="11357" r="5779" b="11515">identical</wd>

<space/>

</ln>

<ln l="1416" t="11611" r="5779" b="11813" baseLine="11760" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1416" t="11611" r="2765" b="11813">pronunciation),</wd>

<space/>

<wd l="2837" t="11611" r="3374" b="11770">which</wd>

<space/>

<wd l="3442" t="11611" r="3787" b="11770">also</wd>

<space/>

<wd l="3859" t="11611" r="4762" b="11813">frequently</wd>

<space/>

<wd l="4829" t="11659" r="5318" b="11770">cause</wd>

<space/>

<wd l="5381" t="11611" r="5779" b="11770">real-</wd>

</ln>

<ln l="1421" t="11861" r="5779" b="12062" baseLine="12014">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1421" t="11861" r="1877" b="12019">word</wd>

<space/>

<wd l="2155" t="11861" r="3302" b="12062">misspellings,</wd>

<space/>

<wd l="3600" t="11861" r="3994" b="12019">such</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4286" t="11909" r="4459" b="12019">as</wd>

<space/>

<wd l="4752" t="11861" r="5779" b="12062">“segmento”</wd>

<space/>

</run>

</ln>

<ln l="1426" t="12115" r="5280" b="12317" baseLine="12269" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="12115" r="2414" b="12317">(=segment)</wd>

<space/>

<wd l="2482" t="12115" r="2803" b="12274">and</wd>

<space/>

<wd l="2856" t="12115" r="4056" b="12317">“seguimento”</wd>

<space/>

<wd l="4118" t="12115" r="4886" b="12317">(=follow</wd>

<space/>

<wd l="4944" t="12115" r="5280" b="12317">up).</wd>

</ln>

</para>

<para l="1402" t="12370" r="5789" b="14342" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="1651" t="12370" r="5779" b="12571" baseLine="12518" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="12370" r="1987" b="12528">The</wd>

<space/>

<wd l="2045" t="12370" r="3269" b="12528">normalization</wd>

<space/>

<wd l="3326" t="12370" r="3528" b="12528">of</wd>

<space/>

<wd l="3566" t="12418" r="4459" b="12571">acronyms,</wd>

<space/>

<wd l="4531" t="12374" r="5213" b="12528">Internet</wd>

<space/>

<wd l="5280" t="12370" r="5779" b="12571">slang,</wd>

<space/>

</ln>

<ln l="1426" t="12624" r="5779" b="12826" baseLine="12773" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="12624" r="1742" b="12782">and</wd>

<space/>

<wd l="1786" t="12672" r="2362" b="12826">proper</wd>

<space/>

<wd l="2410" t="12672" r="2966" b="12782">names</wd>

<space/>

<wd l="3029" t="12624" r="3163" b="12782">is</wd>

<space/>

<wd l="3221" t="12624" r="4123" b="12826">dependent</wd>

<space/>

<wd l="4176" t="12672" r="4392" b="12782">on</wd>

<space/>

<wd l="4440" t="12624" r="4848" b="12782">their</wd>

<space/>

<wd l="4896" t="12624" r="5779" b="12826">respective</wd>

<space/>

</ln>

<ln l="1426" t="12874" r="5779" b="13075" baseLine="13027" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="12874" r="2198" b="13066">lexicons,</wd>

<space/>

<wd l="2290" t="12874" r="2827" b="13032">which</wd>

<space/>

<wd l="2914" t="12922" r="3173" b="13032">are</wd>

<space/>

<wd l="3259" t="12893" r="3538" b="13032">not</wd>

<space/>

<wd l="3624" t="12874" r="4013" b="13075">only</wd>

<space/>

<wd l="4094" t="12874" r="5779" b="13075">domain-dependent,</wd>

<space/>

</ln>

<ln l="1416" t="13128" r="5779" b="13330" baseLine="13277" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1416" t="13128" r="1704" b="13286">but</wd>

<space/>

<wd l="1766" t="13128" r="2112" b="13286">also</wd>

<space/>

<wd l="2184" t="13128" r="3792" b="13330">corpus-dependent,</wd>

<space/>

<wd l="3864" t="13176" r="4037" b="13286">as</wd>

<space/>

<wd l="4104" t="13176" r="4358" b="13286">we</wd>

<space/>

<wd l="4430" t="13128" r="5222" b="13286">observed</wd>

<space/>

<wd l="5285" t="13128" r="5453" b="13282">in</wd>

<space/>

<wd l="5515" t="13128" r="5779" b="13286">the</wd>

<space/>

</ln>

<ln l="1426" t="13382" r="5789" b="13541" baseLine="13531" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="13382" r="2381" b="13541">evaluation.</wd>

<space/>

<wd l="2486" t="13382" r="2827" b="13541">The</wd>

<space/>

<wd l="2928" t="13382" r="3653" b="13541">lexicons</wd>

<space/>

<wd l="3749" t="13382" r="4162" b="13541">have</wd>

<space/>

<wd l="4253" t="13382" r="4675" b="13541">been</wd>

<space/>

<wd l="4776" t="13382" r="5789" b="13541">constructed</wd>

<space/>

</ln>

<ln l="1421" t="13632" r="5784" b="13834" baseLine="13786" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="13632" r="1810" b="13790">with</wd>

<space/>

<wd l="1949" t="13632" r="2314" b="13790">data</wd>

<space/>

<wd l="2453" t="13632" r="2875" b="13790">from</wd>

<space/>

<wd l="3005" t="13632" r="3274" b="13790">the</wd>

<space/>

<wd l="3413" t="13632" r="4157" b="13834">Buscapé</wd>

<space/>

<wd l="4296" t="13680" r="4877" b="13834">corpus</wd>

<space/>

<wd l="5021" t="13632" r="5338" b="13790">and</wd>

<space/>

<wd l="5467" t="13632" r="5784" b="13790">this</wd>

<space/>

</ln>

<ln l="1402" t="13886" r="5784" b="14088" baseLine="14040" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1402" t="13886" r="2112" b="14088">justifies</wd>

<space/>

<wd l="2165" t="13886" r="2434" b="14045">the</wd>

<space/>

<wd l="2482" t="13886" r="2842" b="14045">best</wd>

<space/>

<wd l="2890" t="13886" r="4003" b="14088">performance</wd>

<space/>

<wd l="4061" t="13886" r="4262" b="14045">of</wd>

<space/>

<wd l="4291" t="13886" r="4555" b="14045">the</wd>

<space/>

<wd l="4608" t="13886" r="5563" b="14045">normalizer</wd>

<space/>

<wd l="5616" t="13886" r="5784" b="14040">in</wd>

<space/>

</ln>

<ln l="1430" t="14141" r="2506" b="14342" baseLine="14290" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1430" t="14141" r="1824" b="14299">such</wd>

<space/>

<wd l="1882" t="14189" r="2506" b="14342">corpus.</wd>

</ln>

</para>

<para l="1421" t="14390" r="5789" b="15312" alignment="justified" fli="216" lsp="exactly" lspExact="250" language="en">

<ln l="1651" t="14390" r="5779" b="14592" baseLine="14544" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1651" t="14390" r="1987" b="14549">The</wd>

<space/>

<wd l="2059" t="14390" r="3278" b="14549">normalization</wd>

<space/>

<wd l="3355" t="14390" r="3557" b="14549">of</wd>

<space/>

<wd l="3595" t="14390" r="4642" b="14592">punctuation</wd>

<space/>

<wd l="4704" t="14438" r="5779" b="14592">presupposes</wd>

<space/>

</ln>

<ln l="1426" t="14645" r="5779" b="14846" baseLine="14798" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="14693" r="1522" b="14803">a</wd>

<space/>

<wd l="1560" t="14645" r="2002" b="14846">plain</wd>

<space/>

<wd l="2050" t="14664" r="2424" b="14803">text.</wd>

<space/>

<wd l="2482" t="14650" r="2789" b="14803">For</wd>

<space/>

<wd l="2832" t="14645" r="3144" b="14803">this</wd>

<space/>

<wd l="3197" t="14693" r="3821" b="14837">reason,</wd>

<space/>

<wd l="3878" t="14693" r="4330" b="14803">some</wd>

<space/>

<wd l="4373" t="14645" r="5054" b="14846">product</wd>

<space/>

<wd l="5102" t="14645" r="5779" b="14803">reviews</wd>

<space/>

</ln>

<ln l="1421" t="14899" r="5779" b="15101" baseLine="15048" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="14899" r="1747" b="15058">that</wd>

<space/>

<wd l="1834" t="14899" r="2438" b="15058">consist</wd>

<space/>

<wd l="2520" t="14899" r="2722" b="15058">of</wd>

<space/>

<wd l="2789" t="14899" r="3360" b="15101">simple</wd>

<space/>

<wd l="3446" t="14899" r="3912" b="15058">items</wd>

<space/>

<wd l="4003" t="14947" r="4186" b="15058">or</wd>

<space/>

<wd l="4258" t="14947" r="4699" b="15058">noun</wd>

<space/>

<wd l="4771" t="14899" r="5429" b="15101">phrases</wd>

<space/>

<wd l="5515" t="14947" r="5779" b="15058">are</wd>

<space/>

</ln>

<ln l="1426" t="15154" r="5789" b="15312" baseLine="15302" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1426" t="15154" r="2126" b="15312">difficult</wd>

<space/>

<wd l="2237" t="15173" r="2405" b="15312">to</wd>

<space/>

<wd l="2515" t="15154" r="3437" b="15312">normalize.</wd>

<space/>

<wd l="3562" t="15154" r="3725" b="15307">If</wd>

<space/>

<wd l="3816" t="15154" r="4214" b="15312">each</wd>

<space/>

<wd l="4325" t="15154" r="4714" b="15312">item</wd>

<space/>

<wd l="4829" t="15173" r="5285" b="15312">starts</wd>

<space/>

<wd l="5395" t="15154" r="5789" b="15312">with</wd>

<space/>

</ln>

</para>

</column>

<column l="6113" t="1417" r="10524" b="15087">

<para l="6125" t="1464" r="10488" b="2678" alignment="justified" spaceBefore="2" lsp="exactly" lspExact="252" language="en">

<ln l="6125" t="1464" r="10488" b="1666" baseLine="1613" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="1512" r="7051" b="1666">uppercase,</wd>

<space/>

<wd l="7118" t="1464" r="7387" b="1622">the</wd>

<space/>

<wd l="7464" t="1483" r="8208" b="1622">sentence</wd>

<space/>

<wd l="8280" t="1464" r="9446" b="1666">segmentation</wd>

<space/>

<wd l="9509" t="1464" r="9845" b="1622">tool</wd>

<space/>

<wd l="9917" t="1464" r="10488" b="1622">inserts</wd>

<space/>

</ln>

<ln l="6130" t="1718" r="10488" b="1920" baseLine="1867" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="1766" r="6226" b="1877">a</wd>

<space/>

<wd l="6302" t="1718" r="6874" b="1920">period</wd>

<space/>

<wd l="6955" t="1718" r="7354" b="1877">after</wd>

<space/>

<wd l="7440" t="1718" r="7838" b="1877">each</wd>

<space/>

<wd l="7925" t="1718" r="8352" b="1877">item.</wd>

<space/>

<wd l="8453" t="1718" r="9499" b="1920">Conversely,</wd>

<space/>

<wd l="9590" t="1718" r="9744" b="1872">if</wd>

<space/>

<wd l="9806" t="1766" r="10013" b="1877">an</wd>

<space/>

<wd l="10099" t="1718" r="10488" b="1877">item</wd>

<space/>

</ln>

<ln l="6134" t="1968" r="10488" b="2126" baseLine="2122" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="1987" r="6586" b="2126">starts</wd>

<space/>

<wd l="6638" t="1968" r="6806" b="2122">in</wd>

<space/>

<wd l="6854" t="1968" r="7354" b="2126">lower</wd>

<space/>

<wd l="7402" t="2016" r="7776" b="2126">case</wd>

<space/>

<wd l="7829" t="1968" r="8146" b="2126">and</wd>

<space/>

<wd l="8184" t="1968" r="8621" b="2126">there</wd>

<space/>

<wd l="8674" t="1968" r="8813" b="2126">is</wd>

<space/>

<wd l="8866" t="1968" r="9523" b="2126">another</wd>

<space/>

<wd l="9571" t="1968" r="9960" b="2126">item</wd>

<space/>

<wd l="10003" t="1968" r="10171" b="2122">in</wd>

<space/>

<wd l="10219" t="1968" r="10488" b="2126">the</wd>

<space/>

</ln>

<ln l="6134" t="2222" r="10488" b="2424" baseLine="2376" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="2270" r="6979" b="2424">sequence,</wd>

<space/>

<wd l="7032" t="2222" r="7301" b="2381">the</wd>

<space/>

<wd l="7358" t="2242" r="8102" b="2381">sentence</wd>

<space/>

<wd l="8160" t="2222" r="9326" b="2424">segmentation</wd>

<space/>

<wd l="9370" t="2222" r="9710" b="2381">tool</wd>

<space/>

<wd l="9763" t="2222" r="10157" b="2381">does</wd>

<space/>

<wd l="10210" t="2242" r="10488" b="2381">not</wd>

<space/>

</ln>

<ln l="6130" t="2477" r="7358" b="2678" baseLine="2626" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2477" r="6614" b="2635">insert</wd>

<space/>

<wd l="6662" t="2477" r="7358" b="2678">periods.</wd>

</ln>

</para>

<para l="6125" t="2726" r="10488" b="4195" alignment="justified" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="2726" r="10488" b="2928" baseLine="2880" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="2726" r="7075" b="2885">Another</wd>

<space/>

<wd l="7210" t="2726" r="7949" b="2928">problem</wd>

<space/>

<wd l="8088" t="2726" r="8424" b="2885">that</wd>

<space/>

<wd l="8563" t="2726" r="9259" b="2885">remains</wd>

<space/>

<wd l="9408" t="2726" r="10205" b="2885">unsolved</wd>

<space/>

<wd l="10349" t="2726" r="10488" b="2885">is</wd>

<space/>

</ln>

<ln l="6125" t="2981" r="10478" b="3182" baseLine="3134" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="2981" r="6730" b="3139">related</wd>

<space/>

<wd l="6816" t="3000" r="6984" b="3139">to</wd>

<space/>

<wd l="7085" t="3029" r="7843" b="3139">common</wd>

<space/>

<wd l="7939" t="2981" r="8472" b="3139">words</wd>

<space/>

<wd l="8573" t="2981" r="9197" b="3139">written</wd>

<space/>

<wd l="9293" t="2981" r="9461" b="3134">in</wd>

<space/>

<wd l="9552" t="3029" r="10478" b="3182">uppercase.</wd>

<space/>

</ln>

<ln l="6125" t="3235" r="10488" b="3437" baseLine="3384" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3240" r="6427" b="3394">We</wd>

<space/>

<wd l="6485" t="3235" r="6869" b="3437">only</wd>

<space/>

<wd l="6917" t="3254" r="7574" b="3394">convert</wd>

<space/>

<wd l="7622" t="3283" r="8501" b="3437">uppercase</wd>

<space/>

<wd l="8549" t="3254" r="8717" b="3394">to</wd>

<space/>

<wd l="8774" t="3235" r="9648" b="3394">lowercase</wd>

<space/>

<wd l="9696" t="3235" r="10171" b="3394">when</wd>

<space/>

<wd l="10219" t="3235" r="10488" b="3394">the</wd>

<space/>

</ln>

<ln l="6125" t="3490" r="10483" b="3691" baseLine="3638" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3490" r="6662" b="3648">whole</wd>

<space/>

<wd l="6787" t="3490" r="7387" b="3648">review</wd>

<space/>

<wd l="7517" t="3490" r="7656" b="3648">is</wd>

<space/>

<wd l="7790" t="3490" r="7958" b="3643">in</wd>

<space/>

<wd l="8083" t="3538" r="9005" b="3691">uppercase.</wd>

<space/>

<wd l="9149" t="3490" r="10099" b="3682">Otherwise,</wd>

<space/>

<wd l="10229" t="3538" r="10483" b="3648">we</wd>

<space/>

</ln>

<ln l="6125" t="3739" r="10483" b="3941" baseLine="3893" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3739" r="6893" b="3898">maintain</wd>

<space/>

<wd l="6936" t="3739" r="7205" b="3898">the</wd>

<space/>

<wd l="7243" t="3787" r="8174" b="3941">uppercase,</wd>

<space/>

<wd l="8218" t="3739" r="8914" b="3898">because</wd>

<space/>

<wd l="8962" t="3739" r="9082" b="3898">it</wd>

<space/>

<wd l="9120" t="3787" r="9499" b="3941">may</wd>

<space/>

<wd l="9542" t="3739" r="10238" b="3898">indicate</wd>

<space/>

<wd l="10286" t="3787" r="10483" b="3898">an</wd>

<space/>

</ln>

<ln l="6130" t="3994" r="8453" b="4195" baseLine="4142" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="4042" r="6898" b="4195">acronym</wd>

<space/>

<wd l="6950" t="4042" r="7133" b="4152">or</wd>

<space/>

<wd l="7190" t="4042" r="7286" b="4152">a</wd>

<space/>

<wd l="7334" t="4042" r="7915" b="4195">proper</wd>

<space/>

<wd l="7968" t="4042" r="8453" b="4152">noun.</wd>

</ln>

</para>

<para l="6130" t="4459" r="9686" b="4627" alignment="left" spaceBefore="207" lsp="exactly" lspExact="274" language="en">

<ln l="6130" t="4459" r="9686" b="4627" baseLine="4618" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="6130" t="4459" r="6298" b="4627">6.</wd>

<space/>

<wd l="6490" t="4459" r="7018" b="4627">Final</wd>

<space/>

<wd l="7080" t="4459" r="7934" b="4627">remarks</wd>

<space/>

<wd l="8011" t="4459" r="8386" b="4627">and</wd>

<space/>

<wd l="8453" t="4459" r="9082" b="4627">future</wd>

<space/>

<wd l="9144" t="4459" r="9686" b="4627">work</wd>

</ln>

</para>

<para l="6125" t="4882" r="10493" b="6350" alignment="justified" spaceBefore="157" lsp="exactly" lspExact="253" language="en">

<ln l="6130" t="4882" r="10488" b="5083" baseLine="5035" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="4882" r="6466" b="5040">The</wd>

<space/>

<wd l="6581" t="4882" r="7709" b="5040">UGCNormal</wd>

<space/>

<wd l="7824" t="4882" r="8942" b="5083">performance</wd>

<space/>

<wd l="9058" t="4930" r="9624" b="5083">ranges</wd>

<space/>

<wd l="9744" t="4882" r="10171" b="5040">from</wd>

<space/>

<wd l="10282" t="4930" r="10488" b="5040">an</wd>

<space/>

</ln>

<ln l="6130" t="5136" r="10493" b="5338" baseLine="5285" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5184" r="6806" b="5338">average</wd>

<space/>

<wd l="6907" t="5136" r="7109" b="5294">of</wd>

<space/>

<wd l="7186" t="5136" r="7579" b="5299">25%</wd>

<space/>

<wd l="7685" t="5136" r="8016" b="5338">(for</wd>

<space/>

<wd l="8112" t="5136" r="8597" b="5338">glued</wd>

<space/>

<wd l="8688" t="5136" r="9293" b="5338">words)</wd>

<space/>

<wd l="9394" t="5155" r="9562" b="5294">to</wd>

<space/>

<wd l="9672" t="5136" r="10061" b="5299">84%</wd>

<space/>

<wd l="10166" t="5136" r="10493" b="5338">(for</wd>

<space/>

</ln>

<ln l="6130" t="5390" r="10483" b="5592" baseLine="5539" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5438" r="6893" b="5549">common</wd>

<space/>

<wd l="6950" t="5390" r="8170" b="5592">misspellings).</wd>

<space/>

<wd l="8242" t="5390" r="8578" b="5549">The</wd>

<space/>

<wd l="8640" t="5390" r="9523" b="5549">validation</wd>

<space/>

<wd l="9581" t="5390" r="9782" b="5549">of</wd>

<space/>

<wd l="9816" t="5390" r="10085" b="5549">the</wd>

<space/>

<wd l="10147" t="5390" r="10483" b="5549">tool</wd>

<space/>

</ln>

<ln l="6134" t="5640" r="10493" b="5842" baseLine="5794" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="5640" r="6672" b="5798">shows</wd>

<space/>

<wd l="6758" t="5640" r="7090" b="5798">that</wd>

<space/>

<wd l="7176" t="5640" r="7445" b="5798">the</wd>

<space/>

<wd l="7531" t="5640" r="8102" b="5798">results</wd>

<space/>

<wd l="8194" t="5640" r="8395" b="5798">of</wd>

<space/>

<wd l="8453" t="5640" r="8851" b="5798">both</wd>

<space/>

<wd l="8938" t="5640" r="9331" b="5798">POS</wd>

<space/>

<wd l="9422" t="5640" r="10085" b="5842">tagging</wd>

<space/>

<wd l="10171" t="5640" r="10493" b="5798">and</wd>

<space/>

</ln>

<ln l="6130" t="5894" r="10493" b="6096" baseLine="6048" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="5894" r="6797" b="6096">opinion</wd>

<space/>

<wd l="6883" t="5894" r="8040" b="6053">classification</wd>

<space/>

<wd l="8122" t="5894" r="8558" b="6053">tasks</wd>

<space/>

<wd l="8650" t="5894" r="9494" b="6096">improved</wd>

<space/>

<wd l="9576" t="5894" r="10190" b="6053">around</wd>

<space/>

<wd l="10267" t="5894" r="10493" b="6096">by</wd>

<space/>

</ln>

<ln l="6125" t="6149" r="9835" b="6350" baseLine="6298" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="6168" r="6451" b="6307">two</wd>

<space/>

<wd l="6504" t="6168" r="7459" b="6350">percentage</wd>

<space/>

<wd l="7512" t="6149" r="8050" b="6350">points</wd>

<space/>

<wd l="8112" t="6149" r="8515" b="6307">after</wd>

<space/>

<wd l="8568" t="6149" r="9835" b="6307">normalization.</wd>

</ln>

</para>

<para l="6120" t="6398" r="10493" b="9384" alignment="justified" spaceBefore="2" fli="216" lsp="exactly" lspExact="252" language="en">

<ln l="6350" t="6398" r="10493" b="6600" baseLine="6552" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6350" t="6398" r="7186" b="6600">Although</wd>

<space/>

<wd l="7243" t="6398" r="7680" b="6557">there</wd>

<space/>

<wd l="7742" t="6398" r="7882" b="6557">is</wd>

<space/>

<wd l="7944" t="6446" r="8160" b="6557">no</wd>

<space/>

<wd l="8222" t="6398" r="9211" b="6600">all-purpose</wd>

<space/>

<wd l="9269" t="6398" r="10493" b="6557">normalization</wd>

<space/>

</ln>

<ln l="6120" t="6653" r="10488" b="6854" baseLine="6806" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="6701" r="6835" b="6854">process,</wd>

<space/>

<wd l="6902" t="6653" r="7022" b="6811">it</wd>

<space/>

<wd l="7085" t="6653" r="7219" b="6811">is</wd>

<space/>

<wd l="7282" t="6653" r="8006" b="6854">possible</wd>

<space/>

<wd l="8064" t="6672" r="8232" b="6811">to</wd>

<space/>

<wd l="8294" t="6701" r="8755" b="6811">reuse</wd>

<space/>

<wd l="8827" t="6701" r="9278" b="6811">some</wd>

<space/>

<wd l="9341" t="6653" r="10080" b="6811">modules</wd>

<space/>

<wd l="10152" t="6653" r="10349" b="6811">of</wd>

<space/>

<wd l="10392" t="6701" r="10488" b="6811">a</wd>

<space/>

</ln>

<ln l="6125" t="6907" r="10488" b="7109" baseLine="7056" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="6907" r="7344" b="7066">normalization</wd>

<space/>

<wd l="7656" t="6907" r="8419" b="7109">pipeline,</wd>

<space/>

<wd l="8746" t="6907" r="9734" b="7109">assembling</wd>

<space/>

<wd l="10046" t="6907" r="10488" b="7066">them</wd>

<space/>

</ln>

<ln l="6130" t="7162" r="10488" b="7363" baseLine="7310" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="7162" r="7056" b="7363">differently</wd>

<space/>

<wd l="7133" t="7162" r="7301" b="7315">in</wd>

<space/>

<wd l="7387" t="7162" r="7848" b="7320">order</wd>

<space/>

<wd l="7925" t="7181" r="8093" b="7320">to</wd>

<space/>

<wd l="8189" t="7162" r="8496" b="7320">suit</wd>

<space/>

<wd l="8578" t="7162" r="9240" b="7320">another</wd>

<space/>

<wd l="9312" t="7210" r="10051" b="7363">purpose.</wd>

<space/>

<wd l="10147" t="7162" r="10488" b="7320">The</wd>

<space/>

</ln>

<ln l="6120" t="7411" r="10488" b="7613" baseLine="7565" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="7411" r="6936" b="7613">proposed</wd>

<space/>

<wd l="7066" t="7411" r="8290" b="7570">normalization</wd>

<space/>

<wd l="8424" t="7411" r="8760" b="7570">tool</wd>

<space/>

<wd l="8899" t="7411" r="9240" b="7570">will</wd>

<space/>

<wd l="9384" t="7411" r="10147" b="7613">certainly</wd>

<space/>

<wd l="10277" t="7411" r="10488" b="7570">be</wd>

<space/>

</ln>

<ln l="6125" t="7666" r="10493" b="7867" baseLine="7814" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="7666" r="6658" b="7824">useful</wd>

<space/>

<wd l="6715" t="7666" r="6974" b="7824">for</wd>

<space/>

<wd l="7018" t="7666" r="7286" b="7824">the</wd>

<space/>

<wd l="7339" t="7666" r="8472" b="7867">development</wd>

<space/>

<wd l="8525" t="7666" r="8726" b="7824">of</wd>

<space/>

<wd l="8755" t="7666" r="9216" b="7824">UGC</wd>

<space/>

<wd l="9269" t="7666" r="10493" b="7824">normalization</wd>

<space/>

</ln>

<ln l="6125" t="7920" r="10483" b="8122" baseLine="8069" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="7920" r="6547" b="8078">tools</wd>

<space/>

<wd l="6898" t="7920" r="7229" b="8078">that</wd>

<space/>

<wd l="7574" t="7968" r="8525" b="8122">encompass</wd>

<space/>

<wd l="8885" t="7920" r="9317" b="8078">short</wd>

<space/>

<wd l="9658" t="7968" r="10483" b="8122">messages</wd>

<space/>

</ln>

<ln l="6125" t="8170" r="10488" b="8328" baseLine="8323" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8170" r="7387" b="8328">normalization.</wd>

<space/>

<wd l="7507" t="8174" r="7680" b="8323">In</wd>

<space/>

<wd l="7786" t="8170" r="8246" b="8328">order</wd>

<space/>

<wd l="8342" t="8189" r="8510" b="8328">to</wd>

<space/>

<wd l="8606" t="8170" r="8818" b="8328">be</wd>

<space/>

<wd l="8923" t="8170" r="9595" b="8328">suitable</wd>

<space/>

<wd l="9701" t="8170" r="9955" b="8328">for</wd>

<space/>

<wd l="10056" t="8170" r="10488" b="8328">short</wd>

<space/>

</ln>

<ln l="6125" t="8424" r="10483" b="8626" baseLine="8578" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="8472" r="6946" b="8626">messages</wd>

<space/>

<wd l="6998" t="8424" r="8270" b="8616">normalization,</wd>

<space/>

<wd l="8318" t="8424" r="8630" b="8582">this</wd>

<space/>

<wd l="8683" t="8424" r="9019" b="8582">tool</wd>

<space/>

<wd l="9067" t="8424" r="9566" b="8582">needs</wd>

<space/>

<wd l="9614" t="8443" r="9782" b="8582">to</wd>

<space/>

<wd l="9835" t="8424" r="10483" b="8582">address</wd>

<space/>

</ln>

<ln l="6134" t="8678" r="10493" b="8880" baseLine="8827" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6134" t="8726" r="6586" b="8837">some</wd>

<space/>

<wd l="6648" t="8678" r="7464" b="8880">problems</wd>

<space/>

<wd l="7536" t="8678" r="8141" b="8837">related</wd>

<space/>

<wd l="8198" t="8698" r="8362" b="8837">to</wd>

<space/>

<wd l="8429" t="8678" r="8885" b="8837">word</wd>

<space/>

<wd l="8952" t="8678" r="10109" b="8880">agglutination</wd>

<space/>

<wd l="10176" t="8678" r="10493" b="8837">and</wd>

<space/>

</ln>

<ln l="6130" t="8928" r="10488" b="9086" baseLine="9082" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6130" t="8928" r="6874" b="9086">informal</wd>

<space/>

<wd l="7066" t="8928" r="8242" b="9086">abbreviations</wd>

<space/>

<wd l="8434" t="8928" r="8635" b="9086">of</wd>

<space/>

<wd l="8794" t="8976" r="9317" b="9086">nouns</wd>

<space/>

<wd l="9504" t="8928" r="9893" b="9086">with</wd>

<space/>

<wd l="10080" t="8947" r="10488" b="9086">stem</wd>

<space/>

</ln>

<ln l="6120" t="9182" r="7253" b="9384" baseLine="9336" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6120" t="9182" r="7253" b="9384">preservation.</wd>

</ln>

</para>

<para l="6120" t="9437" r="10498" b="12672" alignment="justified" fli="216" lsp="exactly" lspExact="253" language="en">

<ln l="6355" t="9437" r="10498" b="9638" baseLine="9586" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="9437" r="6739" b="9595">This</wd>

<space/>

<wd l="6792" t="9437" r="7747" b="9595">normalizer</wd>

<space/>

<wd l="7795" t="9437" r="8496" b="9595">evolved</wd>

<space/>

<wd l="8544" t="9437" r="8966" b="9595">from</wd>

<space/>

<wd l="9014" t="9485" r="9110" b="9595">a</wd>

<space/>

<wd l="9149" t="9437" r="10498" b="9638">phonetic-based</wd>

<space/>

</ln>

<ln l="6134" t="9691" r="10483" b="9893" baseLine="9840" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6134" t="9691" r="6715" b="9893">speller</wd>

<space/>

<wd l="6778" t="9691" r="7315" b="9850">aimed</wd>

<space/>

<wd l="7382" t="9710" r="7541" b="9850">at</wd>

<space/>

<wd l="7603" t="9691" r="8314" b="9893">tackling</wd>

<space/>

<wd l="8381" t="9739" r="9144" b="9850">common</wd>

<space/>

<wd l="9211" t="9739" r="9720" b="9850">errors</wd>

<space/>

<wd l="9792" t="9691" r="9960" b="9845">in</wd>

<space/>

<wd l="10022" t="9691" r="10483" b="9850">UGC</wd>

<space/>

</ln>

<ln l="6130" t="9941" r="10488" b="10142" baseLine="10094" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="9941" r="6734" b="10142">(words</wd>

<space/>

<wd l="6869" t="9941" r="7488" b="10099">written</wd>

<space/>

<wd l="7627" t="9989" r="7800" b="10099">as</wd>

<space/>

<wd l="7934" t="9941" r="8318" b="10142">they</wd>

<space/>

<wd l="8453" t="9989" r="8712" b="10099">are</wd>

<space/>

<wd l="8842" t="9941" r="10003" b="10142">pronounced).</wd>

<space/>

<wd l="10147" t="9941" r="10488" b="10099">Our</wd>

<space/>

</ln>

<ln l="6130" t="10195" r="10483" b="10397" baseLine="10344" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10195" r="6931" b="10397">approach</wd>

<space/>

<wd l="7138" t="10195" r="7277" b="10354">is</wd>

<space/>

<wd l="7488" t="10195" r="8170" b="10397">largerly</wd>

<space/>

<wd l="8376" t="10195" r="9274" b="10397">dependent</wd>

<space/>

<wd l="9485" t="10243" r="9701" b="10354">on</wd>

<space/>

<wd l="9907" t="10195" r="10483" b="10354">lexical</wd>

<space/>

</ln>

<ln l="6125" t="10450" r="10483" b="10651" baseLine="10598" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10498" r="7003" b="10642">resources,</wd>

<space/>

<wd l="7094" t="10450" r="7896" b="10651">incurring</wd>

<space/>

<wd l="7982" t="10498" r="8078" b="10608">a</wd>

<space/>

<wd l="8155" t="10450" r="8544" b="10651">high</wd>

<space/>

<wd l="8626" t="10450" r="9739" b="10608">maintenance</wd>

<space/>

<wd l="9821" t="10469" r="10214" b="10608">cost.</wd>

<space/>

<wd l="10310" t="10454" r="10483" b="10603">In</wd>

<space/>

</ln>

<ln l="6130" t="10699" r="10488" b="10901" baseLine="10853" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="10699" r="6893" b="10891">addition,</wd>

<space/>

<wd l="6989" t="10699" r="7306" b="10858">this</wd>

<space/>

<wd l="7402" t="10699" r="8357" b="10858">normalizer</wd>

<space/>

<wd l="8448" t="10699" r="8842" b="10858">does</wd>

<space/>

<wd l="8938" t="10718" r="9221" b="10858">not</wd>

<space/>

<wd l="9307" t="10699" r="10022" b="10901">perform</wd>

<space/>

<wd l="10109" t="10699" r="10488" b="10858">well</wd>

<space/>

</ln>

<ln l="6125" t="10954" r="10488" b="11112" baseLine="11107" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="10954" r="6518" b="11112">with</wd>

<space/>

<wd l="6605" t="10954" r="7464" b="11112">real-word</wd>

<space/>

<wd l="7555" t="11002" r="8102" b="11112">errors.</wd>

<space/>

<wd l="8203" t="10958" r="8510" b="11112">We</wd>

<space/>

<wd l="8592" t="10954" r="9230" b="11112">believe</wd>

<space/>

<wd l="9322" t="10954" r="9653" b="11112">that</wd>

<space/>

<wd l="9744" t="10954" r="10488" b="11112">machine</wd>

<space/>

</ln>

<ln l="6130" t="11208" r="10483" b="11410" baseLine="11357" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="11208" r="6850" b="11410">learning</wd>

<space/>

<wd l="6931" t="11208" r="7915" b="11410">approaches</wd>

<space/>

<wd l="8002" t="11208" r="8338" b="11366">will</wd>

<space/>

<wd l="8434" t="11208" r="9000" b="11366">enable</wd>

<space/>

<wd l="9086" t="11256" r="9278" b="11366">us</wd>

<space/>

<wd l="9365" t="11227" r="9533" b="11366">to</wd>

<space/>

<wd l="9624" t="11256" r="10483" b="11366">overcome</wd>

<space/>

</ln>

<ln l="6125" t="11458" r="10483" b="11659" baseLine="11611" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11458" r="6576" b="11616">these</wd>

<space/>

<wd l="6638" t="11458" r="7853" b="11659">shortcomings.</wd>

<space/>

<wd l="7920" t="11462" r="8222" b="11616">We</wd>

<space/>

<wd l="8280" t="11458" r="8741" b="11650">have,</wd>

<space/>

<wd l="8803" t="11458" r="9437" b="11650">indeed,</wd>

<space/>

<wd l="9494" t="11458" r="9970" b="11616">made</wd>

<space/>

<wd l="10032" t="11506" r="10483" b="11616">some</wd>

<space/>

</ln>

<ln l="6120" t="11712" r="10483" b="11914" baseLine="11866" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11712" r="7152" b="11914">preliminary</wd>

<space/>

<wd l="7248" t="11712" r="8318" b="11914">experiments</wd>

<space/>

<wd l="8419" t="11712" r="8813" b="11870">with</wd>

<space/>

<wd l="8914" t="11712" r="9696" b="11914">language</wd>

<space/>

<wd l="9797" t="11712" r="10483" b="11904">models,</wd>

<space/>

</ln>

<ln l="6120" t="11966" r="10483" b="12168" baseLine="12115" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="11966" r="6408" b="12125">but</wd>

<space/>

<wd l="6485" t="11966" r="6754" b="12125">the</wd>

<space/>

<wd l="6835" t="11966" r="7219" b="12168">high</wd>

<space/>

<wd l="7306" t="12014" r="8266" b="12125">occurrence</wd>

<space/>

<wd l="8352" t="11966" r="8549" b="12125">of</wd>

<space/>

<wd l="8611" t="11966" r="9019" b="12125">false</wd>

<space/>

<wd l="9096" t="11966" r="9878" b="12168">positives</wd>

<space/>

<wd l="9965" t="11966" r="10483" b="12168">(well-</wd>

</ln>

<ln l="6125" t="12221" r="10493" b="12422" baseLine="12370" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12221" r="6749" b="12379">written</wd>

<space/>

<wd l="6850" t="12221" r="7382" b="12379">words</wd>

<space/>

<wd l="7488" t="12221" r="8218" b="12422">wrongly</wd>

<space/>

<wd l="8323" t="12221" r="9202" b="12422">corrected)</wd>

<space/>

<wd l="9312" t="12221" r="10003" b="12379">remains</wd>

<space/>

<wd l="10114" t="12269" r="10286" b="12379">as</wd>

<space/>

<wd l="10397" t="12269" r="10493" b="12379">a</wd>

<space/>

</ln>

<ln l="6130" t="12470" r="7013" b="12672" baseLine="12624" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="12470" r="7013" b="12672">challenge.</wd>

</ln>

</para>

<para l="6125" t="13229" r="8102" b="13445" alignment="left" spaceBefore="499" lsp="exactly" lspExact="274" language="en">

<ln l="6125" t="13229" r="8102" b="13445" baseLine="13392" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13229" r="8102" b="13445">Acknowledgements</wd>

</ln>

</para>

<para l="6125" t="13613" r="10507" b="15038" alignment="justified" spaceBefore="114" lsp="exactly" lspExact="251" language="en">

<ln l="6125" t="13613" r="10483" b="13814" baseLine="13766" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="13618" r="6480" b="13771">Part</wd>

<space/>

<wd l="6576" t="13613" r="6773" b="13771">of</wd>

<space/>

<wd l="6845" t="13613" r="7114" b="13771">the</wd>

<space/>

<wd l="7205" t="13613" r="7776" b="13771">results</wd>

<space/>

<wd l="7867" t="13613" r="8722" b="13814">presented</wd>

<space/>

<wd l="8813" t="13613" r="8981" b="13766">in</wd>

<space/>

<wd l="9072" t="13613" r="9384" b="13771">this</wd>

<space/>

<wd l="9475" t="13661" r="9970" b="13814">paper</wd>

<space/>

<wd l="10061" t="13661" r="10483" b="13771">were</wd>

<space/>

</ln>

<ln l="6130" t="13867" r="10488" b="14069" baseLine="14016" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="13867" r="6888" b="14026">obtained</wd>

<space/>

<wd l="6960" t="13867" r="7646" b="14069">through</wd>

<space/>

<wd l="7723" t="13867" r="8458" b="14026">research</wd>

<space/>

<wd l="8539" t="13867" r="9197" b="14069">activity</wd>

<space/>

<wd l="9278" t="13867" r="9446" b="14021">in</wd>

<space/>

<wd l="9523" t="13867" r="9792" b="14026">the</wd>

<space/>

<wd l="9869" t="13867" r="10488" b="14069">project</wd>

<space/>

</ln>

<ln l="6130" t="14122" r="10488" b="14323" baseLine="14270">

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6130" t="14122" r="6787" b="14280">entitled</wd>

<space/>

</run>

<wd l="6989" t="14122" r="7891" b="14280"><run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">“</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Semantic</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8098" t="14122" r="9053" b="14323">Processing</wd>

<space/>

<wd l="9254" t="14122" r="9456" b="14280">of</wd>

<space/>

<wd l="9634" t="14126" r="10114" b="14280">Texts</wd>

<space/>

<wd l="10320" t="14122" r="10488" b="14275">in</wd>

<space/>

</run>

</ln>

<ln l="6125" t="14371" r="10488" b="14573" baseLine="14525" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14371" r="6926" b="14530">Brazilian</wd>

<space/>

<wd l="7061" t="14371" r="8179" b="14573">Portuguese&quot;,</wd>

<space/>

<wd l="8323" t="14371" r="9206" b="14573">sponsored</wd>

<space/>

<wd l="9331" t="14371" r="9557" b="14573">by</wd>

<space/>

<wd l="9691" t="14371" r="10488" b="14573">Samsung</wd>

<space/>

</ln>

<ln l="6125" t="14626" r="10507" b="14784" baseLine="14779" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14626" r="7032" b="14784">Eletrônica</wd>

<space/>

<wd l="7094" t="14626" r="7301" b="14784">da</wd>

<space/>

<wd l="7363" t="14626" r="8270" b="14784">Amazônia</wd>

<space/>

<wd l="8333" t="14626" r="8779" b="14784">Ltda.</wd>

<space/>

<wd l="8856" t="14626" r="9360" b="14784">under</wd>

<space/>

<wd l="9418" t="14626" r="9686" b="14784">the</wd>

<space/>

<wd l="9754" t="14645" r="10234" b="14784">terms</wd>

<space/>

<wd l="10306" t="14626" r="10507" b="14784">of</wd>

<space/>

</ln>

<ln l="6125" t="14880" r="9566" b="15038" baseLine="15029" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="14880" r="6926" b="15038">Brazilian</wd>

<space/>

<wd l="6989" t="14880" r="7589" b="15038">federal</wd>

<space/>

<wd l="7656" t="14880" r="7968" b="15038">law</wd>

<space/>

<wd l="8026" t="14880" r="8698" b="15038">number</wd>

<space/>

<wd l="8760" t="14880" r="9566" b="15038">8.248/91.</wd>

</ln>

</para>

</column>

</section>

<dd l="1402" t="15736" r="10524" b="15977">

<para l="5800" t="15787" r="6138" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15787" r="6072" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="39">

<wd l="5866" t="15787" r="6072" b="15946">45</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1409" marginTop="1416" marginRight="1378" marginBottom="1302" offsetX="-18" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1409" t="1416" r="10531" b="15378">

<column l="1409" t="1416" r="5820" b="15378">

<para l="1421" t="1474" r="2525" b="1642" alignment="left" spaceBefore="5" lsp="exactly" lspExact="273" language="en">

<ln l="1421" t="1474" r="2525" b="1642" baseLine="1632" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="1474" r="2525" b="1642">References</wd>

</ln>

</para>

<para l="1421" t="1853" r="5779" b="3187" alignment="justified" li="216" spaceBefore="103" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1421" t="1853" r="5765" b="2026" baseLine="1992" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1853" r="2050" b="2026">Aluísio,</wd>

<space/>

<wd l="2194" t="1853" r="2333" b="1997">S.</wd>

<space/>

<wd l="2472" t="1858" r="2741" b="2026">M.;</wd>

<space/>

<wd l="2880" t="1853" r="3667" b="2026">Pelizzoni,</wd>

<space/>

<wd l="3802" t="1858" r="3922" b="1997">J.</wd>

<space/>

<wd l="4061" t="1858" r="4330" b="2026">M.;</wd>

<space/>

<wd l="4469" t="1853" r="5088" b="2026">Marchi,</wd>

<space/>

<wd l="5222" t="1853" r="5400" b="1997">A.</wd>

<space/>

<wd l="5539" t="1858" r="5765" b="2026">R.;</wd>

<space/>

</ln>

<ln l="1651" t="2083" r="5770" b="2266" baseLine="2222" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2083" r="2342" b="2256">Oliveira,</wd>

<space/>

<wd l="2458" t="2088" r="2616" b="2227">L.</wd>

<space/>

<wd l="2731" t="2088" r="2971" b="2256">H.;</wd>

<space/>

<wd l="3086" t="2083" r="3797" b="2256">Manenti,</wd>

<space/>

<wd l="3912" t="2088" r="4138" b="2256">R.;</wd>

<space/>

<wd l="4253" t="2083" r="5472" b="2266">Marquivafável,</wd>

<space/>

<wd l="5587" t="2088" r="5770" b="2227">V.</wd>

<space/>

</ln>

<ln l="1651" t="2314" r="5779" b="2496" baseLine="2453" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2314" r="2218" b="2496">(2003).</wd>

<space/>

<wd l="2314" t="2314" r="2558" b="2453">An</wd>

<space/>

<wd l="2640" t="2333" r="3259" b="2458">account</wd>

<space/>

<wd l="3346" t="2314" r="3533" b="2458">of</wd>

<space/>

<wd l="3586" t="2314" r="3830" b="2458">the</wd>

<space/>

<wd l="3917" t="2314" r="4675" b="2496">challenge</wd>

<space/>

<wd l="4766" t="2314" r="4954" b="2458">of</wd>

<space/>

<wd l="5006" t="2314" r="5606" b="2496">tagging</wd>

<space/>

<wd l="5693" t="2357" r="5779" b="2458">a</wd>

<space/>

</ln>

<ln l="1646" t="2544" r="5765" b="2726" baseLine="2683" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="2544" r="2386" b="2688">reference</wd>

<space/>

<wd l="2554" t="2587" r="3077" b="2726">corpus</wd>

<space/>

<wd l="3245" t="2544" r="3432" b="2688">of</wd>

<space/>

<wd l="3566" t="2544" r="4296" b="2688">Brazilian</wd>

<space/>

<wd l="4459" t="2549" r="5386" b="2726">Portuguese.</wd>

<space/>

<wd l="5563" t="2549" r="5765" b="2688">In:</wd>

<space/>

</ln>

<ln l="1642" t="2774" r="5774" b="2957" baseLine="2914">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="2774" r="2645" b="2957">Proceedings</wd>

<space/>

<wd l="2750" t="2774" r="2942" b="2957">of</wd>

<space/>

</run>

<wd l="3000" t="2774" r="4291" b="2918"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">PROPOR´2003</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4411" t="2774" r="5093" b="2957">Springer</wd>

<space/>

<wd l="5189" t="2774" r="5774" b="2957">Verlag,</wd>

<space/>

</run>

</ln>

<ln l="1646" t="3005" r="3154" b="3187" baseLine="3144" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="3005" r="2093" b="3178">2003,</wd>

<space/>

<wd l="2141" t="3048" r="2386" b="3187">pp.</wd>

<space/>

<wd l="2467" t="3005" r="3154" b="3149">110-117.</wd>

</ln>

</para>

<para l="1421" t="3293" r="5794" b="4166" alignment="justified" li="216" spaceBefore="53" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="1421" t="3293" r="5779" b="3475" baseLine="3432" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1421" t="3293" r="2083" b="3470">Avanço,</wd>

<space/>

<wd l="2146" t="3298" r="2304" b="3437">L.</wd>

<space/>

<wd l="2376" t="3298" r="2611" b="3466">V.,</wd>

<space/>

<wd l="2678" t="3298" r="3216" b="3466">Duran,</wd>

<space/>

<wd l="3278" t="3298" r="3494" b="3437">M.</wd>

<space/>

<wd l="3576" t="3293" r="3773" b="3466">S.;</wd>

<space/>

<wd l="3840" t="3298" r="4397" b="3466">Nunes,</wd>

<space/>

<wd l="4459" t="3298" r="4675" b="3437">M.</wd>

<space/>

<wd l="4752" t="3293" r="4930" b="3437">G.</wd>

<space/>

<wd l="4997" t="3298" r="5179" b="3437">V.</wd>

<space/>

<wd l="5256" t="3293" r="5779" b="3475">(2014)</wd>

<space/>

</ln>

<ln l="1651" t="3523" r="5774" b="3706" baseLine="3662" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="3523" r="2338" b="3667">Towards</wd>

<space/>

<wd l="2472" t="3566" r="2558" b="3667">a</wd>

<space/>

<wd l="2683" t="3523" r="3379" b="3667">Phonetic</wd>

<space/>

<wd l="3504" t="3523" r="4229" b="3667">Brazilian</wd>

<space/>

<wd l="4358" t="3528" r="5242" b="3706">Portuguese</wd>

<space/>

<wd l="5376" t="3523" r="5774" b="3706">Spell</wd>

<space/>

</ln>

<ln l="1651" t="3754" r="5794" b="3936" baseLine="3893">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1651" t="3754" r="2347" b="3898">Checker.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="2486" t="3758" r="3365" b="3936">TorPorEsp</wd>

<space/>

<wd l="3490" t="3840" r="3547" b="3859">-</wd>

<space/>

<wd l="3696" t="3754" r="4478" b="3936">Workshop</wd>

<space/>

<wd l="4608" t="3802" r="4800" b="3898">on</wd>

<space/>

<wd l="4930" t="3754" r="5366" b="3898">Tools</wd>

<space/>

<wd l="5486" t="3754" r="5794" b="3898">and</wd>

<space/>

</run>

</ln>

<ln l="1642" t="3984" r="5779" b="4166" baseLine="4123" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1642" t="3989" r="2467" b="4128">Resources</wd>

<space/>

<wd l="2467" t="3984" r="2746" b="4166">for</wd>

<space/>

<wd l="2770" t="3984" r="3907" b="4166">Automatically</wd>

<space/>

<wd l="3941" t="3989" r="4838" b="4166">Processing</wd>

<space/>

<wd l="4867" t="3989" r="5779" b="4166">Portuguese</wd>

</ln>

</para>

<para l="1646" t="4214" r="5770" b="4858" alignment="left" li="216" spaceBefore="1" lsp="exactly" lspExact="228" language="en">

<tabs position="1646"/>

<tabs alignment="left" position="4858" leaderChar=" "/>

<tabs alignment="right" position="4392" leaderChar=" "/>

<ln l="1651" t="4214" r="5770" b="4397" baseLine="4354" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="4214" r="1958" b="4358">and</wd>

<tab position="1958"/>

<wd l="2674" t="4214" r="3336" b="4397">Spanish.</wd>

<tab position="3336"/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4085" t="4214" r="4858" b="4358">Available</wd>

<tab position="4858"/>

<wd l="5592" t="4234" r="5770" b="4358">at:
</wd>

</run>

</ln>

<ln l="1646" t="4445" r="5731" b="4627" baseLine="4579" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="1646" t="4445" r="5731" b="4627">http://www.lbd.dcc.ufmg.br/bdbcomp/servlet/Even</wd>

<space/>

</ln>

<ln l="1646" t="4675" r="2616" b="4858" baseLine="4810" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="1646" t="4675" r="2616" b="4858">to?id=755.).</wd>

</ln>

</para>

<para l="1421" t="5026" r="5784" b="6355" alignment="justified" li="216" spaceBefore="107" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1421" t="5026" r="5784" b="5208" baseLine="5160" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="5026" r="2083" b="5203">Avanço,</wd>

<space/>

<wd l="2131" t="5030" r="2290" b="5170">L.</wd>

<space/>

<wd l="2342" t="5030" r="2582" b="5198">V.;</wd>

<space/>

<wd l="2630" t="5030" r="3187" b="5198">Nunes,</wd>

<space/>

<wd l="3235" t="5030" r="3451" b="5170">M.</wd>

<space/>

<wd l="3509" t="5026" r="3686" b="5170">G.</wd>

<space/>

<wd l="3744" t="5030" r="3926" b="5170">V.</wd>

<space/>

<wd l="3984" t="5026" r="4550" b="5208">(2014).</wd>

<space/>

<wd l="4608" t="5026" r="5784" b="5170">Lexicon-based</wd>

<space/>

</ln>

<ln l="1656" t="5256" r="5784" b="5438" baseLine="5390" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="5256" r="2424" b="5400">sentiment</wd>

<space/>

<wd l="2568" t="5256" r="3202" b="5438">analysis</wd>

<space/>

<wd l="3355" t="5256" r="3586" b="5400">for</wd>

<space/>

<wd l="3725" t="5256" r="4339" b="5400">reviews</wd>

<space/>

<wd l="4493" t="5256" r="4675" b="5400">of</wd>

<space/>

<wd l="4786" t="5256" r="5482" b="5438">products</wd>

<space/>

<wd l="5630" t="5256" r="5784" b="5395">in</wd>

<space/>

</ln>

<ln l="1646" t="5486" r="5779" b="5669" baseLine="5621">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="5486" r="2376" b="5630">Brazilian</wd>

<space/>

<wd l="2443" t="5491" r="3370" b="5669">Portuguese.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3442" t="5486" r="4445" b="5669">Proceedings</wd>

<space/>

<wd l="4517" t="5486" r="4709" b="5669">of</wd>

<space/>

<wd l="4738" t="5486" r="4973" b="5630">the</wd>

<space/>

<wd l="5035" t="5486" r="5779" b="5630">Brazilian</wd>

<space/>

</run>

</ln>

<ln l="1656" t="5717" r="5779" b="5899" baseLine="5851">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="5717" r="2563" b="5899">Conference</wd>

<space/>

<wd l="2698" t="5765" r="2890" b="5861">on</wd>

<space/>

<wd l="3014" t="5717" r="3854" b="5899">Intelligent</wd>

<space/>

<wd l="3974" t="5717" r="4603" b="5899">Systems</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4738" t="5717" r="5582" b="5899">(BRACIS)</wd>

<space/>

<wd l="5722" t="5803" r="5779" b="5822">-</wd>

<space/>

</run>

</ln>

<ln l="1646" t="5942" r="5774" b="6115" baseLine="6082" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="5942" r="2093" b="6115">2014,</wd>

<space/>

<wd l="2213" t="5942" r="2856" b="6086">October</wd>

<space/>

<wd l="2981" t="5942" r="3475" b="6115">18-23,</wd>

<space/>

<wd l="3590" t="5942" r="4032" b="6115">2014,</wd>

<space/>

<wd l="4152" t="5942" r="4306" b="6082">in</wd>

<space/>

<wd l="4426" t="5942" r="4709" b="6086">São</wd>

<space/>

<wd l="4829" t="5942" r="5386" b="6115">Carlos,</wd>

<space/>

<wd l="5515" t="5942" r="5774" b="6115">SP,</wd>

<space/>

</ln>

<ln l="1646" t="6173" r="3274" b="6355" baseLine="6312">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="6173" r="2179" b="6346">Brazil,</wd>

<space/>

<wd l="2232" t="6216" r="2472" b="6355">pp.</wd>

<space/>

</run>

<wd l="2534" t="6173" r="3274" b="6317"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">277</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">–</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">281.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1421" t="6523" r="5779" b="7166" alignment="justified" li="216" spaceBefore="111" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1421" t="6523" r="5779" b="6706" baseLine="6662" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="6523" r="1747" b="6696">Aw,</wd>

<space/>

<wd l="1829" t="6523" r="2064" b="6696">A.;</wd>

<space/>

<wd l="2150" t="6523" r="2702" b="6706">Zhang,</wd>

<space/>

<wd l="2784" t="6528" r="3053" b="6696">M.;</wd>

<space/>

<wd l="3139" t="6523" r="3571" b="6696">Xiao,</wd>

<space/>

<wd l="3648" t="6523" r="4085" b="6696">J.;Su,</wd>

<space/>

<wd l="4166" t="6528" r="4282" b="6667">J.</wd>

<space/>

<wd l="4373" t="6523" r="4896" b="6706">(2006)</wd>

<space/>

<wd l="4973" t="6523" r="5117" b="6662">A</wd>

<space/>

<wd l="5184" t="6523" r="5779" b="6706">phrase-</wd>

</ln>

<ln l="1642" t="6754" r="5770" b="6898" baseLine="6893" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="6754" r="2102" b="6898">based</wd>

<space/>

<wd l="2165" t="6754" r="2909" b="6898">statistical</wd>

<space/>

<wd l="2966" t="6754" r="3461" b="6898">model</wd>

<space/>

<wd l="3528" t="6754" r="3758" b="6898">for</wd>

<space/>

<wd l="3821" t="6754" r="4200" b="6898">SMS</wd>

<space/>

<wd l="4262" t="6773" r="4565" b="6898">text</wd>

<space/>

<wd l="4622" t="6754" r="5770" b="6898">normalization.</wd>

<space/>

</ln>

<ln l="1651" t="6984" r="4838" b="7166" baseLine="7123">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="6989" r="1814" b="7123">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1858" t="6984" r="2861" b="7166">Proceedings</wd>

<space/>

<wd l="2918" t="6984" r="3110" b="7166">of</wd>

<space/>

<wd l="3130" t="6984" r="3854" b="7157">COLING</wd>

<space/>

</run>

<wd l="3898" t="6984" r="4344" b="7128"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2006</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4406" t="6984" r="4838" b="7128">ACL.</wd>

</run>

</ln>

</para>

<para l="1421" t="7334" r="5784" b="8669" alignment="justified" li="216" spaceBefore="98" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1421" t="7334" r="5770" b="7517" baseLine="7474" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="7334" r="1795" b="7507">Bali,</wd>

<space/>

<wd l="1944" t="7339" r="2117" b="7478">R.</wd>

<space/>

<wd l="2275" t="7334" r="2798" b="7517">(2013)</wd>

<space/>

<wd l="2947" t="7334" r="3091" b="7474">A</wd>

<space/>

<wd l="3235" t="7334" r="4138" b="7478">Theoretical</wd>

<space/>

<wd l="4282" t="7334" r="4891" b="7478">Review</wd>

<space/>

<wd l="5040" t="7378" r="5237" b="7478">on</wd>

<space/>

<wd l="5386" t="7334" r="5770" b="7478">SMS</wd>

<space/>

</ln>

<ln l="1642" t="7565" r="5774" b="7747" baseLine="7699" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1642" t="7565" r="2798" b="7709">Normalization</wd>

<space/>

<wd l="2981" t="7565" r="3413" b="7747">using</wd>

<space/>

<wd l="3595" t="7565" r="4186" b="7709">Hidden</wd>

<space/>

<wd l="4368" t="7565" r="4997" b="7709">Markov</wd>

<space/>

<wd l="5179" t="7565" r="5774" b="7709">Models</wd>

<space/>

</ln>

<ln l="1651" t="7795" r="5784" b="7978" baseLine="7930">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1651" t="7795" r="2395" b="7978">(HMMs).</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2434" t="7795" r="3509" b="7939">International</wd>

<space/>

<wd l="3528" t="7795" r="4166" b="7939">Journal</wd>

<space/>

<wd l="4205" t="7795" r="4397" b="7978">of</wd>

<space/>

<wd l="4402" t="7795" r="5194" b="7978">Computer</wd>

<space/>

<wd l="5237" t="7795" r="5784" b="7939">Trends</wd>

<space/>

</run>

</ln>

<ln l="1651" t="8026" r="5784" b="8208" baseLine="8160">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1651" t="8026" r="1958" b="8170">and</wd>

<space/>

<wd l="2040" t="8026" r="2966" b="8208">Technology</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3053" t="8026" r="3744" b="8208">(IJCTT),</wd>

<space/>

<wd l="3835" t="8026" r="4128" b="8170">V.4</wd>

<space/>

<wd l="4219" t="8026" r="5362" b="8208">(7):2388-2387</wd>

<space/>

<wd l="5453" t="8026" r="5784" b="8208">July</wd>

<space/>

</run>

</ln>

<ln l="1651" t="8256" r="5770" b="8438" baseLine="8390" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1651" t="8261" r="2050" b="8400">Issue</wd>

<space/>

<wd l="2122" t="8256" r="2510" b="8400">2013</wd>

<space/>

<wd l="2602" t="8256" r="3072" b="8400">.ISSN</wd>

<space/>

<wd l="3139" t="8256" r="5770" b="8438">2231-2803.www.ijcttjournal.org.</wd>

<space/>

</ln>

<ln l="1646" t="8486" r="5275" b="8669" baseLine="8621" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1646" t="8486" r="2434" b="8630">Published</wd>

<space/>

<wd l="2477" t="8486" r="2688" b="8669">by</wd>

<space/>

<wd l="2741" t="8486" r="3379" b="8630">Seventh</wd>

<space/>

<wd l="3437" t="8486" r="3893" b="8630">Sense</wd>

<space/>

<wd l="3946" t="8486" r="4675" b="8630">Research</wd>

<space/>

<wd l="4728" t="8486" r="5275" b="8669">Group.</wd>

</ln>

</para>

<para l="1421" t="8837" r="5784" b="9936" alignment="justified" li="216" spaceBefore="114" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="1421" t="8837" r="5784" b="9019" baseLine="8971" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="8837" r="2246" b="9010">Bildhauer,</wd>

<space/>

<wd l="2309" t="8842" r="2515" b="9010">F.;</wd>

<space/>

<wd l="2592" t="8837" r="3235" b="9010">Schäfer,</wd>

<space/>

<wd l="3298" t="8842" r="3470" b="8981">R.</wd>

<space/>

<wd l="3538" t="8837" r="4066" b="9019">(2013)</wd>

<space/>

<wd l="4128" t="8837" r="5093" b="8981">Token-level</wd>

<space/>

<wd l="5150" t="8837" r="5568" b="8981">noise</wd>

<space/>

<wd l="5630" t="8837" r="5784" b="8976">in</wd>

<space/>

</ln>

<ln l="1651" t="9062" r="5784" b="9245" baseLine="9202" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="9062" r="2040" b="9245">large</wd>

<space/>

<wd l="2347" t="9062" r="2722" b="9206">Web</wd>

<space/>

<wd l="3038" t="9106" r="3648" b="9245">corpora</wd>

<space/>

<wd l="3955" t="9062" r="4234" b="9206">and</wd>

<space/>

<wd l="4541" t="9062" r="5784" b="9206">non-destructive</wd>

<space/>

</ln>

<ln l="1646" t="9293" r="5770" b="9475" baseLine="9432" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="9293" r="2755" b="9437">normalization</wd>

<space/>

<wd l="2962" t="9293" r="3197" b="9437">for</wd>

<space/>

<wd l="3398" t="9293" r="4133" b="9475">linguistic</wd>

<space/>

<wd l="4344" t="9293" r="5342" b="9475">applications.</wd>

<space/>

<wd l="5563" t="9298" r="5770" b="9437">In:</wd>

<space/>

</ln>

<ln l="1642" t="9523" r="5779" b="9706" baseLine="9662" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="9523" r="2645" b="9706">Proceedings</wd>

<space/>

<wd l="2726" t="9523" r="2918" b="9706">of</wd>

<space/>

<wd l="2962" t="9523" r="3538" b="9706">Corpus</wd>

<space/>

<wd l="3605" t="9523" r="4282" b="9706">Analysis</wd>

<space/>

<wd l="4358" t="9523" r="4699" b="9667">with</wd>

<space/>

<wd l="4766" t="9528" r="5227" b="9696">Noise</wd>

<space/>

<wd l="5309" t="9533" r="5458" b="9667">in</wd>

<space/>

<wd l="5539" t="9523" r="5779" b="9667">the</wd>

<space/>

</ln>

<ln l="1646" t="9754" r="3365" b="9936" baseLine="9893">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="9754" r="2165" b="9936">Signal</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2213" t="9754" r="2794" b="9936">(CANS</wd>

<space/>

<wd l="2856" t="9754" r="3365" b="9936">2013).</wd>

</run>

</ln>

</para>

<para l="1421" t="10104" r="5789" b="11208" alignment="justified" li="216" spaceBefore="100" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1421" t="10104" r="5779" b="10286" baseLine="10243" underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1421" t="10109" r="1939" b="10277">Boros,</wd>

<space/>

<wd l="2016" t="10109" r="2232" b="10277">T.;</wd>

<space/>

<wd l="2318" t="10104" r="3216" b="10277">Stefănescu,</wd>

<space/>

<wd l="3288" t="10109" r="3523" b="10277">D.;</wd>

<space/>

<wd l="3610" t="10109" r="3912" b="10277">Ion,</wd>

<space/>

<wd l="3989" t="10109" r="4157" b="10248">R.</wd>

<space/>

<wd l="4243" t="10104" r="4766" b="10286">(2012)</wd>

<space/>

<wd l="4838" t="10104" r="5611" b="10277">Bermuda,</wd>

<space/>

<wd l="5693" t="10147" r="5779" b="10248">a</wd>

<space/>

</ln>

<ln l="1651" t="10334" r="5774" b="10517" baseLine="10469" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1651" t="10334" r="2554" b="10478">data-driven</wd>

<space/>

<wd l="2606" t="10334" r="2918" b="10478">tool</wd>

<space/>

<wd l="2981" t="10334" r="3211" b="10478">for</wd>

<space/>

<wd l="3259" t="10334" r="3946" b="10517">phonetic</wd>

<space/>

<wd l="4003" t="10334" r="5016" b="10517">transcription</wd>

<space/>

<wd l="5074" t="10334" r="5261" b="10478">of</wd>

<space/>

<wd l="5294" t="10334" r="5774" b="10478">words</wd>

<space/>

</ln>

<ln l="1651" t="10565" r="5789" b="10747" baseLine="10699">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1651" t="10565" r="2006" b="10709">info.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2074" t="10565" r="3077" b="10747">Proceedings</wd>

<space/>

<wd l="3144" t="10565" r="3336" b="10747">of</wd>

<space/>

<wd l="3346" t="10565" r="3984" b="10738">Natural</wd>

<space/>

<wd l="4022" t="10570" r="4834" b="10747">Language</wd>

<space/>

<wd l="4891" t="10570" r="5789" b="10747">Processing</wd>

<space/>

</run>

</ln>

<ln l="1608" t="10795" r="5779" b="10978" baseLine="10930">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1608" t="10795" r="1882" b="10978">for</wd>

<space/>

<wd l="1997" t="10800" r="2846" b="10978">Improving</wd>

<space/>

<wd l="2981" t="10795" r="3581" b="10939">Textual</wd>

<space/>

<wd l="3691" t="10795" r="4714" b="10978">Accessibility</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="4843" t="10795" r="5779" b="10978">(NLP4ITA)</wd>

<space/>

</run>

</ln>

<ln l="1646" t="11026" r="2515" b="11208" baseLine="11160" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1646" t="11026" r="2515" b="11208">Workshop.</wd>

</ln>

</para>

<para l="1426" t="11376" r="5784" b="12206" alignment="justified" li="216" spaceBefore="110" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1426" t="11376" r="5784" b="11558" baseLine="11510" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1426" t="11376" r="2093" b="11549">Carletta,</wd>

<space/>

<wd l="2314" t="11381" r="2434" b="11520">J.</wd>

<space/>

<wd l="2664" t="11376" r="3235" b="11558">(1996).</wd>

<space/>

<wd l="3461" t="11376" r="4258" b="11558">Assessing</wd>

<space/>

<wd l="4478" t="11376" r="5366" b="11558">Agreement</wd>

<space/>

<wd l="5587" t="11419" r="5784" b="11520">on</wd>

<space/>

</ln>

<ln l="1651" t="11606" r="5770" b="11789" baseLine="11741" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="11606" r="2746" b="11750">Classification</wd>

<space/>

<wd l="2995" t="11606" r="3499" b="11750">Tasks:</wd>

<space/>

<wd l="3763" t="11606" r="4070" b="11750">The</wd>

<space/>

<wd l="4320" t="11611" r="4843" b="11789">Kappa</wd>

<space/>

<wd l="5098" t="11606" r="5770" b="11750">Statistic.</wd>

<space/>

</ln>

<ln l="1656" t="11832" r="5784" b="12014" baseLine="11971">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1656" t="11832" r="2856" b="12014">Computational</wd>

<space/>

</run>

<wd l="2904" t="11837" r="3835" b="12014"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Linguistics</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3907" t="11832" r="4205" b="11976">vol.</wd>

<space/>

<wd l="4282" t="11832" r="4526" b="12005">22,</wd>

<space/>

<wd l="4603" t="11875" r="4738" b="11976">n.</wd>

<space/>

<wd l="4814" t="11832" r="4958" b="12005">2,</wd>

<space/>

<wd l="5030" t="11875" r="5270" b="12014">pp.</wd>

<space/>

<wd l="5352" t="11832" r="5784" b="11976">249--</wd>

<space/>

</run>

</ln>

<ln l="1646" t="12062" r="1987" b="12206" baseLine="12202" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="12062" r="1987" b="12206">254.</wd>

</ln>

</para>

<para l="1426" t="12355" r="5784" b="13454" alignment="justified" li="216" spaceBefore="49" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="1426" t="12355" r="5784" b="12538" baseLine="12490">

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1426" t="12355" r="2194" b="12538">Chrupała,</wd>

<space/>

<wd l="2309" t="12355" r="2486" b="12499">G.</wd>

<space/>

<wd l="2611" t="12355" r="3178" b="12538">(2014).</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3288" t="12355" r="4296" b="12538">Normalizing</wd>

<space/>

<wd l="4406" t="12374" r="4910" b="12499">tweets</wd>

<space/>

<wd l="5021" t="12355" r="5376" b="12499">with</wd>

<space/>

<wd l="5486" t="12355" r="5784" b="12499">edit</wd>

<space/>

</run>

</ln>

<ln l="1656" t="12586" r="5784" b="12768" baseLine="12720" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1656" t="12586" r="2160" b="12768">scripts</wd>

<space/>

<wd l="2357" t="12586" r="2640" b="12730">and</wd>

<space/>

<wd l="2822" t="12605" r="3547" b="12730">recurrent</wd>

<space/>

<wd l="3730" t="12586" r="4224" b="12730">neural</wd>

<space/>

<wd l="4421" t="12586" r="5419" b="12768">embeddings.</wd>

<space/>

<wd l="5621" t="12590" r="5784" b="12725">In</wd>

<space/>

</ln>

<ln l="1642" t="12816" r="5779" b="12998" baseLine="12950" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="12816" r="2645" b="12998">Proceedings</wd>

<space/>

<wd l="2750" t="12816" r="2942" b="12998">of</wd>

<space/>

<wd l="3010" t="12816" r="3245" b="12960">the</wd>

<space/>

<wd l="3355" t="12816" r="3758" b="12960">52nd</wd>

<space/>

<wd l="3840" t="12816" r="4430" b="12960">Annual</wd>

<space/>

<wd l="4517" t="12821" r="5184" b="12998">Meeting</wd>

<space/>

<wd l="5285" t="12816" r="5477" b="12998">of</wd>

<space/>

<wd l="5544" t="12816" r="5779" b="12960">the</wd>

<space/>

</ln>

<ln l="1637" t="13046" r="5779" b="13229" baseLine="13181">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1637" t="13046" r="2573" b="13190">Association</wd>

<space/>

<wd l="2587" t="13046" r="2861" b="13229">for</wd>

<space/>

<wd l="2914" t="13046" r="4109" b="13229">Computational</wd>

<space/>

<wd l="4138" t="13051" r="5021" b="13229">Linguistics</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5078" t="13046" r="5779" b="13229">(Volume</wd>

<space/>

</run>

</ln>

<ln l="1646" t="13272" r="3998" b="13454" baseLine="13411" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="13272" r="1790" b="13416">2:</wd>

<space/>

<wd l="1862" t="13272" r="2285" b="13416">Short</wd>

<space/>

<wd l="2333" t="13272" r="2981" b="13454">Papers),</wd>

<space/>

<wd l="3029" t="13315" r="3274" b="13454">pp.</wd>

<space/>

<wd l="3341" t="13272" r="3998" b="13416">680-686</wd>

</ln>

</para>

<para l="1426" t="13565" r="5779" b="14429" alignment="justified" li="216" spaceBefore="49" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1426" t="13565" r="5779" b="13747" baseLine="13699" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="13565" r="1901" b="13738">Clark,</wd>

<space/>

<wd l="1982" t="13570" r="2198" b="13738">E.,</wd>

<space/>

<wd l="2285" t="13565" r="2429" b="13709">&amp;</wd>

<space/>

<wd l="2506" t="13565" r="3000" b="13738">Araki,</wd>

<space/>

<wd l="3082" t="13570" r="3264" b="13709">K.</wd>

<space/>

<wd l="3350" t="13565" r="3922" b="13747">(2011).</wd>

<space/>

<wd l="4008" t="13570" r="4373" b="13709">Text</wd>

<space/>

<wd l="4445" t="13565" r="5549" b="13709">normalization</wd>

<space/>

<wd l="5626" t="13565" r="5779" b="13704">in</wd>

<space/>

</ln>

<ln l="1656" t="13795" r="5774" b="13978" baseLine="13930" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1656" t="13795" r="2107" b="13939">social</wd>

<space/>

<wd l="2194" t="13795" r="2722" b="13939">media:</wd>

<space/>

<wd l="2813" t="13838" r="3538" b="13978">progress,</wd>

<space/>

<wd l="3619" t="13795" r="4358" b="13978">problems</wd>

<space/>

<wd l="4450" t="13795" r="4733" b="13939">and</wd>

<space/>

<wd l="4819" t="13795" r="5774" b="13978">applications</wd>

<space/>

</ln>

<ln l="1651" t="14026" r="5770" b="14208" baseLine="14160" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="14026" r="1882" b="14170">for</wd>

<space/>

<wd l="2011" t="14069" r="2098" b="14170">a</wd>

<space/>

<wd l="2218" t="14026" r="3394" b="14208">pre-processing</wd>

<space/>

<wd l="3533" t="14045" r="4080" b="14208">system</wd>

<space/>

<wd l="4210" t="14026" r="4397" b="14170">of</wd>

<space/>

<wd l="4502" t="14026" r="4992" b="14170">casual</wd>

<space/>

<wd l="5122" t="14026" r="5770" b="14208">English.</wd>

<space/>

</ln>

<ln l="1642" t="14256" r="5774" b="14429" baseLine="14390">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1642" t="14256" r="2952" b="14400">Procedia-Social</wd>

<space/>

<wd l="3000" t="14256" r="3307" b="14400">and</wd>

<space/>

<wd l="3341" t="14256" r="4238" b="14400">Behavioral</wd>

<space/>

</run>

<wd l="4282" t="14256" r="5016" b="14429"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Sciences</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="5074" t="14256" r="5314" b="14429">27,</wd>

<space/>

<wd l="5371" t="14256" r="5774" b="14400">2-11.</wd>

</run>

</ln>

</para>

<para l="1426" t="14602" r="5784" b="15206" alignment="justified" li="216" spaceBefore="111" spaceAfter="111" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="1426" t="14602" r="5770" b="14784" baseLine="14741" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1426" t="14602" r="2117" b="14774">Condori,</wd>

<space/>

<wd l="2179" t="14606" r="2347" b="14746">R.</wd>

<space/>

<wd l="2414" t="14606" r="2578" b="14746">E.</wd>

<space/>

<wd l="2645" t="14606" r="2856" b="14774">L.;</wd>

<space/>

<wd l="2923" t="14602" r="3432" b="14774">Pardo,</wd>

<space/>

<wd l="3494" t="14606" r="3653" b="14746">T.</wd>

<space/>

<wd l="3720" t="14602" r="3902" b="14746">A.</wd>

<space/>

<wd l="3979" t="14602" r="4118" b="14746">S.</wd>

<space/>

<wd l="4186" t="14602" r="4709" b="14784">(2015)</wd>

<space/>

<wd l="4771" t="14602" r="5770" b="14784">Experiments</wd>

<space/>

</ln>

<ln l="1651" t="14832" r="5784" b="15014" baseLine="14971" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1651" t="14875" r="1848" b="14976">on</wd>

<space/>

<wd l="1901" t="14832" r="2606" b="14976">Sentence</wd>

<space/>

<wd l="2654" t="14832" r="3451" b="15014">Boundary</wd>

<space/>

<wd l="3490" t="14832" r="4272" b="14976">Detection</wd>

<space/>

<wd l="4320" t="14832" r="4474" b="14971">in</wd>

<space/>

<wd l="4517" t="14832" r="5784" b="14976">User-Generated</wd>

<space/>

</ln>

<ln l="1646" t="15062" r="5784" b="15206" baseLine="15202" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="15062" r="2021" b="15206">Web</wd>

<space/>

<wd l="2088" t="15062" r="2750" b="15206">Content.</wd>

<space/>

<wd l="2827" t="15067" r="3034" b="15206">In:</wd>

<space/>

<wd l="3125" t="15062" r="3461" b="15206">16th</wd>

<space/>

<wd l="3523" t="15062" r="4536" b="15206">International</wd>

<space/>

<wd l="4603" t="15062" r="5520" b="15206">Conference</wd>

<space/>

<wd l="5587" t="15106" r="5784" b="15206">on</wd>

</ln>

</para>

</column>

<column l="6120" t="1416" r="10531" b="15378">

<para l="6341" t="1459" r="10493" b="2563" alignment="justified" li="216" lsp="exactly" lspExact="230" language="en">

<ln l="6355" t="1459" r="10483" b="1642" baseLine="1594" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="1459" r="7171" b="1642">Intelligent</wd>

<space/>

<wd l="7325" t="1464" r="7690" b="1603">Text</wd>

<space/>

<wd l="7838" t="1459" r="8702" b="1642">Processing</wd>

<space/>

<wd l="8866" t="1459" r="9149" b="1603">and</wd>

<space/>

<wd l="9302" t="1459" r="10483" b="1642">Computational</wd>

<space/>

</ln>

<ln l="6350" t="1690" r="10483" b="1872" baseLine="1824">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6350" t="1690" r="7282" b="1872">Linguistics,</wd>

<space/>

<wd l="7368" t="1690" r="7814" b="1862">2015,</wd>

<space/>

<wd l="7910" t="1690" r="8390" b="1834">Cairo.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8482" t="1690" r="9485" b="1872">Proceedings</wd>

<space/>

<wd l="9576" t="1690" r="9763" b="1872">of</wd>

<space/>

<wd l="9811" t="1690" r="10051" b="1834">the</wd>

<space/>

<wd l="10142" t="1690" r="10483" b="1834">16th</wd>

<space/>

</run>

</ln>

<ln l="6341" t="1920" r="10493" b="2102" baseLine="2054" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6341" t="1920" r="7416" b="2064">International</wd>

<space/>

<wd l="7618" t="1920" r="8525" b="2102">Conference</wd>

<space/>

<wd l="8726" t="1968" r="8918" b="2064">on</wd>

<space/>

<wd l="9110" t="1920" r="9950" b="2102">Intelligent</wd>

<space/>

<wd l="10152" t="1925" r="10493" b="2064">Text</wd>

<space/>

</ln>

<ln l="6346" t="2150" r="10478" b="2333" baseLine="2285">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6346" t="2155" r="7243" b="2333">Processing</wd>

<space/>

<wd l="7286" t="2150" r="7594" b="2294">and</wd>

<space/>

<wd l="7637" t="2150" r="8832" b="2333">Computational</wd>

<space/>

</run>

<wd l="8861" t="2155" r="9792" b="2333"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Linguistics</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9845" t="2150" r="10286" b="2294">2015.</wd>

<space/>

<wd l="10344" t="2198" r="10478" b="2294">v.</wd>

<space/>

</run>

</ln>

<ln l="6355" t="2381" r="7757" b="2563" baseLine="2515" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="2381" r="6792" b="2525">9041.</wd>

<space/>

<wd l="6845" t="2424" r="6989" b="2563">p.</wd>

<space/>

<wd l="7051" t="2381" r="7757" b="2525">227-237.</wd>

</ln>

</para>

<para l="6130" t="2731" r="10493" b="3600" alignment="justified" li="216" spaceBefore="110" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="6130" t="2731" r="10469" b="2914" baseLine="2866" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6130" t="2731" r="7018" b="2904">Contractor,</wd>

<space/>

<wd l="7138" t="2736" r="7378" b="2904">D.;</wd>

<space/>

<wd l="7502" t="2731" r="8251" b="2914">Faruquie,</wd>

<space/>

<wd l="8376" t="2736" r="8534" b="2875">T.</wd>

<space/>

<wd l="8654" t="2731" r="8890" b="2904">A.;</wd>

<space/>

<wd l="9024" t="2731" r="10166" b="2904">Subramaniam,</wd>

<space/>

<wd l="10286" t="2736" r="10469" b="2875">V.</wd>

<space/>

</ln>

<ln l="6355" t="2957" r="10493" b="3139" baseLine="3096">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="2957" r="6878" b="3139">(2010)</wd>

<space/>

<wd l="6926" t="2957" r="8026" b="3139">Unsupervised</wd>

<space/>

<wd l="8074" t="2957" r="8822" b="3139">cleansing</wd>

<space/>

<wd l="8870" t="2957" r="9058" b="3101">of</wd>

<space/>

<wd l="9077" t="2957" r="9509" b="3139">noisy</wd>

<space/>

<wd l="9552" t="2976" r="9888" b="3101">text.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9955" t="2957" r="10493" b="3139">Coling</wd>

<space/>

</run>

</ln>

<ln l="6350" t="3187" r="10483" b="3370" baseLine="3326">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6350" t="3187" r="6802" b="3331">2010:</wd>

<space/>

<wd l="6941" t="3192" r="7474" b="3331">Poster</wd>

<space/>

</run>

<wd l="7622" t="3187" r="8256" b="3360"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Volume</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8386" t="3230" r="8837" b="3370">pages</wd>

<space/>

</run>

<wd l="8990" t="3187" r="9720" b="3360"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">189</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">–</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">196,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9854" t="3187" r="10483" b="3370">Beijing,</wd>

<space/>

</run>

</ln>

<ln l="6350" t="3418" r="7416" b="3600" baseLine="3557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="3418" r="6931" b="3600">August</wd>

<space/>

<wd l="6974" t="3418" r="7416" b="3562">2010.</wd>

</ln>

</para>

<para l="6125" t="3768" r="10488" b="5563" alignment="justified" li="216" spaceBefore="94" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="6125" t="3768" r="10474" b="3946" baseLine="3907" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="3773" r="6662" b="3941">Duran,</wd>

<space/>

<wd l="6725" t="3773" r="6941" b="3912">M.</wd>

<space/>

<wd l="7018" t="3768" r="7210" b="3941">S.;</wd>

<space/>

<wd l="7277" t="3768" r="7944" b="3946">Avanço,</wd>

<space/>

<wd l="8006" t="3773" r="8165" b="3912">L.</wd>

<space/>

<wd l="8232" t="3773" r="8467" b="3941">V.,</wd>

<space/>

<wd l="8530" t="3768" r="9038" b="3941">Pardo,</wd>

<space/>

<wd l="9106" t="3773" r="9264" b="3912">T.</wd>

<space/>

<wd l="9326" t="3768" r="9509" b="3912">A.</wd>

<space/>

<wd l="9586" t="3768" r="9778" b="3941">S.;</wd>

<space/>

<wd l="9845" t="3768" r="10474" b="3941">Aluísio,</wd>

<space/>

</ln>

<ln l="6360" t="3998" r="10483" b="4171" baseLine="4138" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="3998" r="6499" b="4142">S.</wd>

<space/>

<wd l="6638" t="4003" r="6912" b="4171">M.;</wd>

<space/>

<wd l="7046" t="4003" r="7603" b="4171">Nunes,</wd>

<space/>

<wd l="7733" t="4003" r="7949" b="4142">M.</wd>

<space/>

<wd l="8093" t="3998" r="8270" b="4142">G.</wd>

<space/>

<wd l="8410" t="4003" r="8592" b="4142">V.</wd>

<space/>

<wd l="8741" t="3998" r="9182" b="4142">Some</wd>

<space/>

<wd l="9317" t="3998" r="9782" b="4142">issues</wd>

<space/>

<wd l="9922" t="4042" r="10118" b="4142">on</wd>

<space/>

<wd l="10243" t="3998" r="10483" b="4142">the</wd>

<space/>

</ln>

<ln l="6350" t="4229" r="10483" b="4411" baseLine="4363" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="4229" r="7459" b="4373">normalization</wd>

<space/>

<wd l="7550" t="4229" r="7738" b="4373">of</wd>

<space/>

<wd l="7800" t="4272" r="7886" b="4373">a</wd>

<space/>

<wd l="7978" t="4272" r="8501" b="4411">corpus</wd>

<space/>

<wd l="8597" t="4229" r="8784" b="4373">of</wd>

<space/>

<wd l="8842" t="4229" r="9528" b="4411">products</wd>

<space/>

<wd l="9619" t="4229" r="10234" b="4373">reviews</wd>

<space/>

<wd l="10334" t="4229" r="10483" b="4368">in</wd>

<space/>

</ln>

<ln l="6350" t="4459" r="10488" b="4642" baseLine="4594" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="4464" r="7272" b="4642">Portuguese.</wd>

<space/>

<wd l="7363" t="4464" r="7570" b="4603">In:</wd>

<space/>

<wd l="7656" t="4459" r="8069" b="4603">Felix</wd>

<space/>

<wd l="8136" t="4459" r="8933" b="4603">Bildhauer</wd>

<space/>

<wd l="9005" t="4459" r="9149" b="4603">&amp;</wd>

<space/>

<wd l="9226" t="4459" r="9802" b="4603">Roland</wd>

<space/>

<wd l="9883" t="4459" r="10488" b="4603">Schäfer</wd>

<space/>

</ln>

<ln l="6355" t="4690" r="10483" b="4872" baseLine="4824">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="4690" r="6845" b="4872">(eds.),</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6970" t="4690" r="7973" b="4872">Proceedings</wd>

<space/>

<wd l="8102" t="4690" r="8294" b="4872">of</wd>

<space/>

<wd l="8381" t="4690" r="8616" b="4834">the</wd>

<space/>

<wd l="8746" t="4690" r="8990" b="4834">9th</wd>

<space/>

<wd l="9144" t="4690" r="9466" b="4834">Web</wd>

<space/>

<wd l="9600" t="4738" r="9768" b="4834">as</wd>

<space/>

<wd l="9902" t="4690" r="10483" b="4872">Corpus</wd>

<space/>

</run>

</ln>

<ln l="6374" t="4920" r="10483" b="5102" baseLine="5054">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6374" t="4920" r="7157" b="5102">Workshop</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7296" t="4920" r="7997" b="5102">(WaC-9)</wd>

<space/>

<wd l="8126" t="4920" r="8650" b="5064">EACL</wd>

<space/>

<wd l="8774" t="4920" r="9221" b="5093">2014,</wd>

<space/>

<wd l="9350" t="4963" r="9802" b="5102">pages</wd>

<space/>

</run>

<wd l="9936" t="4920" r="10483" b="5093"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">22</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">–</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">28,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6355" t="5150" r="10483" b="5333" baseLine="5285" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="5150" r="7344" b="5333">Gothenburg,</wd>

<space/>

<wd l="7560" t="5150" r="8222" b="5323">Sweden,</wd>

<space/>

<wd l="8429" t="5150" r="8846" b="5333">April</wd>

<space/>

<wd l="9048" t="5150" r="9245" b="5294">26</wd>

<space/>

<wd l="9442" t="5150" r="9878" b="5294">2014.</wd>

<space/>

<wd l="10085" t="5150" r="10483" b="5294">2014</wd>

<space/>

</ln>

<ln l="6350" t="5381" r="9792" b="5563" baseLine="5515" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="5381" r="7296" b="5525">Association</wd>

<space/>

<wd l="7349" t="5381" r="7579" b="5525">for</wd>

<space/>

<wd l="7632" t="5381" r="8813" b="5563">Computational</wd>

<space/>

<wd l="8866" t="5381" r="9792" b="5563">Linguistics.</wd>

</ln>

</para>

<para l="6125" t="5726" r="10507" b="6600" alignment="justified" li="216" spaceBefore="114" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="5726" r="10478" b="5909" baseLine="5866" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="5731" r="6499" b="5899">Han,</wd>

<space/>

<wd l="6672" t="5726" r="7387" b="5899">B.;Cook,</wd>

<space/>

<wd l="7560" t="5731" r="7766" b="5899">P.;</wd>

<space/>

<wd l="7949" t="5726" r="8669" b="5899">Baldwin,</wd>

<space/>

<wd l="8846" t="5731" r="9005" b="5870">T.</wd>

<space/>

<wd l="9187" t="5726" r="9715" b="5909">(2013)</wd>

<space/>

<wd l="9888" t="5726" r="10478" b="5870">Lexical</wd>

<space/>

</ln>

<ln l="6346" t="5957" r="10507" b="6139" baseLine="6096">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6346" t="5957" r="7493" b="6101">Normalisation</wd>

<space/>

<wd l="7651" t="5957" r="7838" b="6101">of</wd>

<space/>

<wd l="7978" t="5957" r="8410" b="6101">Short</wd>

<space/>

<wd l="8568" t="5962" r="8928" b="6101">Text</wd>

<space/>

<wd l="9082" t="5962" r="9898" b="6139">Messages.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="10056" t="5957" r="10507" b="6101">ACM</wd>

<space/>

</run>

</ln>

<ln l="6360" t="6187" r="10488" b="6370" baseLine="6326" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="6192" r="7392" b="6331">Transactions</wd>

<space/>

<wd l="7440" t="6235" r="7632" b="6331">on</wd>

<space/>

<wd l="7666" t="6187" r="8506" b="6370">Intelligent</wd>

<space/>

<wd l="8539" t="6187" r="9168" b="6370">Systems</wd>

<space/>

<wd l="9216" t="6187" r="9523" b="6331">and</wd>

<space/>

<wd l="9562" t="6187" r="10488" b="6370">Technology</wd>

<space/>

</ln>

<ln l="6350" t="6418" r="7733" b="6600" baseLine="6557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="6418" r="6730" b="6600">4(1),</wd>

<space/>

<wd l="6778" t="6461" r="7022" b="6600">pp.</wd>

<space/>

<wd l="7094" t="6418" r="7733" b="6562">5:15:27.</wd>

</ln>

</para>

<para l="6125" t="6768" r="10488" b="8102" alignment="justified" li="216" spaceBefore="107" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="6768" r="10474" b="6950" baseLine="6907" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6125" t="6773" r="6960" b="6941">Hartmann,</wd>

<space/>

<wd l="7013" t="6773" r="7200" b="6912">N.</wd>

<space/>

<wd l="7272" t="6768" r="7469" b="6941">S.;</wd>

<space/>

<wd l="7531" t="6768" r="8189" b="6946">Avanço.</wd>

<space/>

<wd l="8256" t="6773" r="8467" b="6941">L.;</wd>

<space/>

<wd l="8530" t="6768" r="9130" b="6950">Balage,</wd>

<space/>

<wd l="9187" t="6773" r="9336" b="6912">P.</wd>

<space/>

<wd l="9398" t="6773" r="9600" b="6941">P.;</wd>

<space/>

<wd l="9662" t="6773" r="10200" b="6941">Duran,</wd>

<space/>

<wd l="10258" t="6773" r="10474" b="6912">M.</wd>

<space/>

</ln>

<ln l="6360" t="6998" r="10474" b="7181" baseLine="7138" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6360" t="6998" r="6557" b="7171">S.;</wd>

<space/>

<wd l="6624" t="7003" r="7181" b="7171">Nunes,</wd>

<space/>

<wd l="7253" t="7003" r="7464" b="7142">M.</wd>

<space/>

<wd l="7546" t="6998" r="7723" b="7142">G.</wd>

<space/>

<wd l="7795" t="7003" r="8035" b="7171">V.;</wd>

<space/>

<wd l="8107" t="6998" r="8621" b="7171">Pardo,</wd>

<space/>

<wd l="8693" t="7003" r="8904" b="7171">T.;</wd>

<space/>

<wd l="8981" t="6998" r="9610" b="7171">Aluísio,</wd>

<space/>

<wd l="9686" t="6998" r="9826" b="7142">S.</wd>

<space/>

<wd l="9907" t="6998" r="10474" b="7181">(2014).</wd>

<space/>

</ln>

<ln l="6350" t="7229" r="10483" b="7411" baseLine="7363" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6350" t="7229" r="6494" b="7368">A</wd>

<space/>

<wd l="6562" t="7234" r="7027" b="7411">Large</wd>

<space/>

<wd l="7099" t="7229" r="7752" b="7411">Opinion</wd>

<space/>

<wd l="7824" t="7229" r="8390" b="7411">Corpus</wd>

<space/>

<wd l="8467" t="7229" r="8621" b="7368">in</wd>

<space/>

<wd l="8683" t="7234" r="9566" b="7411">Portuguese</wd>

<space/>

<wd l="9648" t="7315" r="9706" b="7334">-</wd>

<space/>

<wd l="9782" t="7229" r="10483" b="7411">Tackling</wd>

<space/>

</ln>

<ln l="6355" t="7459" r="10488" b="7642" baseLine="7594">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6355" t="7459" r="7930" b="7642">Out-Of-Vocabulary</wd>

<space/>

<wd l="7997" t="7459" r="8568" b="7603">Words.</wd>

<space/>

<wd l="8659" t="7464" r="8861" b="7603">In:</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="8942" t="7459" r="9946" b="7642">Proceedings</wd>

<space/>

<wd l="10022" t="7459" r="10214" b="7642">of</wd>

<space/>

<wd l="10248" t="7459" r="10488" b="7603">the</wd>

<space/>

</run>

</ln>

<ln l="6341" t="7690" r="10483" b="7872" baseLine="7824" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6341" t="7690" r="6792" b="7862">Ninth</wd>

<space/>

<wd l="6965" t="7690" r="8035" b="7834">International</wd>

<space/>

<wd l="8218" t="7690" r="9125" b="7872">Conference</wd>

<space/>

<wd l="9317" t="7738" r="9504" b="7834">on</wd>

<space/>

<wd l="9677" t="7694" r="10483" b="7872">Language</wd>

<space/>

</ln>

<ln l="6346" t="7920" r="9634" b="8102" baseLine="8054">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6346" t="7925" r="7171" b="8064">Resources</wd>

<space/>

<wd l="7224" t="7920" r="7531" b="8064">and</wd>

<space/>

<wd l="7570" t="7920" r="8448" b="8064">Evaluation</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="8506" t="7920" r="9072" b="8102">(LREC</wd>

<space/>

<wd l="9125" t="7920" r="9634" b="8102">2014).</wd>

</run>

</ln>

</para>

<para l="6125" t="8270" r="10512" b="8909" alignment="justified" li="216" spaceBefore="113" fli="-216" lsp="exactly" lspExact="233" language="en">

<ln l="6125" t="8270" r="10512" b="8453" baseLine="8405" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="8270" r="7142" b="8443">Levenshtein,</wd>

<space/>

<wd l="7248" t="8275" r="7430" b="8414">V.</wd>

<space/>

<wd l="7550" t="8275" r="7651" b="8414">I.</wd>

<space/>

<wd l="7771" t="8270" r="8299" b="8453">(1966)</wd>

<space/>

<wd l="8400" t="8270" r="8947" b="8453">Binary</wd>

<space/>

<wd l="9048" t="8270" r="9494" b="8414">codes</wd>

<space/>

<wd l="9610" t="8270" r="10214" b="8453">capable</wd>

<space/>

<wd l="10325" t="8270" r="10512" b="8414">of</wd>

<space/>

</ln>

<ln l="6355" t="8501" r="10498" b="8683" baseLine="8635">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="8501" r="7157" b="8683">correcting</wd>

<space/>

<wd l="7200" t="8501" r="7958" b="8674">deletions,</wd>

<space/>

<wd l="8011" t="8501" r="8827" b="8674">insertions,</wd>

<space/>

<wd l="8880" t="8501" r="9163" b="8645">and</wd>

<space/>

<wd l="9202" t="8501" r="9946" b="8645">reversals.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9998" t="8501" r="10498" b="8645">Soviet</wd>

<space/>

</run>

</ln>

<ln l="6346" t="8726" r="8232" b="8909" baseLine="8866">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6346" t="8726" r="6960" b="8909">Physics</wd>

<space/>

</run>

<wd l="6998" t="8726" r="7733" b="8909"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Doklady</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7810" t="8726" r="8232" b="8870">1966.</wd>

</run>

</ln>

</para>

<para l="6125" t="9077" r="10493" b="10872" alignment="justified" li="216" spaceBefore="100" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="9077" r="10474" b="9259" baseLine="9216" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="9077" r="6442" b="9250">Liu,</wd>

<space/>

<wd l="6504" t="9082" r="6706" b="9250">F.;</wd>

<space/>

<wd l="6773" t="9082" r="7296" b="9259">Weng,</wd>

<space/>

<wd l="7358" t="9082" r="7560" b="9250">F.;</wd>

<space/>

<wd l="7627" t="9082" r="8146" b="9259">Wang,</wd>

<space/>

<wd l="8208" t="9082" r="8438" b="9250">B.;</wd>

<space/>

<wd l="8506" t="9077" r="8822" b="9250">Liu,</wd>

<space/>

<wd l="8885" t="9082" r="9067" b="9221">Y.</wd>

<space/>

<wd l="9139" t="9077" r="9667" b="9259">(2011)</wd>

<space/>

<wd l="9730" t="9077" r="10474" b="9250">Insertion,</wd>

<space/>

</ln>

<ln l="6350" t="9307" r="10488" b="9490" baseLine="9446" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="9307" r="7080" b="9480">Deletion,</wd>

<space/>

<wd l="7291" t="9350" r="7459" b="9451">or</wd>

<space/>

<wd l="7661" t="9307" r="8702" b="9451">Substitution?</wd>

<space/>

<wd l="8904" t="9307" r="9917" b="9490">Normalizing</wd>

<space/>

<wd l="10123" t="9312" r="10488" b="9451">Text</wd>

<space/>

</ln>

<ln l="6350" t="9538" r="10493" b="9720" baseLine="9677" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="9542" r="7118" b="9720">Messages</wd>

<space/>

<wd l="7469" t="9538" r="8078" b="9682">without</wd>

<space/>

<wd l="8414" t="9538" r="9883" b="9720">Pre-categorization</wd>

<space/>

<wd l="10224" t="9581" r="10493" b="9682">nor</wd>

<space/>

</ln>

<ln l="6360" t="9768" r="10493" b="9950" baseLine="9907">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="9768" r="7339" b="9950">Supervision.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7512" t="9768" r="8515" b="9950">Proceedings</wd>

<space/>

<wd l="8678" t="9768" r="8870" b="9950">of</wd>

<space/>

<wd l="8995" t="9768" r="9235" b="9912">the</wd>

<space/>

<wd l="9403" t="9768" r="9749" b="9912">49th</wd>

<space/>

<wd l="9902" t="9768" r="10493" b="9912">Annual</wd>

<space/>

</run>

</ln>

<ln l="6341" t="9998" r="10493" b="10181" baseLine="10133" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6341" t="10003" r="7013" b="10181">Meeting</wd>

<space/>

<wd l="7152" t="9998" r="7344" b="10181">of</wd>

<space/>

<wd l="7454" t="9998" r="7690" b="10142">the</wd>

<space/>

<wd l="7829" t="9998" r="8765" b="10142">Association</wd>

<space/>

<wd l="8875" t="9998" r="9149" b="10181">for</wd>

<space/>

<wd l="9298" t="9998" r="10493" b="10181">Computational</wd>

<space/>

</ln>

<ln l="6341" t="10229" r="10483" b="10411" baseLine="10363">

<wd l="6341" t="10229" r="8246" b="10411"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Linguistics</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">:shortpapers,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8419" t="10272" r="8875" b="10411">pages</wd>

<space/>

</run>

<wd l="9053" t="10229" r="9590" b="10402"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">71</run>

<run underlined="none" subsuperscript="none" fontSize="1050" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">–</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">76,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9763" t="10229" r="10483" b="10402">Portland,</wd>

<space/>

</run>

</ln>

<ln l="6355" t="10459" r="10493" b="10642" baseLine="10594" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="10459" r="6994" b="10642">Oregon,</wd>

<space/>

<wd l="7195" t="10464" r="7555" b="10603">June</wd>

<space/>

<wd l="7776" t="10459" r="8270" b="10632">19-24,</wd>

<space/>

<wd l="8472" t="10459" r="8909" b="10603">2011.</wd>

<space/>

<wd l="9115" t="10459" r="10056" b="10603">Association</wd>

<space/>

<wd l="10258" t="10459" r="10493" b="10603">for</wd>

<space/>

</ln>

<ln l="6355" t="10690" r="8515" b="10872" baseLine="10824" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="10690" r="7531" b="10872">Computational</wd>

<space/>

<wd l="7589" t="10690" r="8515" b="10872">Linguistics.</wd>

</ln>

</para>

<para l="6125" t="11040" r="10526" b="12139" alignment="justified" li="216" spaceBefore="114" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="11040" r="10474" b="11222" baseLine="11174" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="11045" r="6965" b="11222">Mosquera,</wd>

<space/>

<wd l="7042" t="11040" r="7824" b="11213">A.;Lloret,</wd>

<space/>

<wd l="7901" t="11045" r="8117" b="11213">E.;</wd>

<space/>

<wd l="8203" t="11040" r="8866" b="11213">Moreda,</wd>

<space/>

<wd l="8947" t="11045" r="9096" b="11184">P.</wd>

<space/>

<wd l="9187" t="11040" r="9710" b="11222">(2012)</wd>

<space/>

<wd l="9787" t="11040" r="10474" b="11184">Towards</wd>

<space/>

</ln>

<ln l="6350" t="11270" r="10483" b="11453" baseLine="11405" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="11270" r="7258" b="11453">Facilitating</wd>

<space/>

<wd l="7378" t="11270" r="7618" b="11414">the</wd>

<space/>

<wd l="7738" t="11270" r="8784" b="11453">Accessibility</wd>

<space/>

<wd l="8904" t="11270" r="9091" b="11414">of</wd>

<space/>

<wd l="9187" t="11270" r="9562" b="11414">Web</wd>

<space/>

<wd l="9682" t="11270" r="9926" b="11414">2.0</wd>

<space/>

<wd l="10046" t="11275" r="10483" b="11414">Texts</wd>

<space/>

</ln>

<ln l="6350" t="11496" r="10526" b="11678" baseLine="11635">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6350" t="11496" r="6970" b="11678">through</wd>

<space/>

<wd l="7176" t="11501" r="7541" b="11640">Text</wd>

<space/>

<wd l="7733" t="11496" r="8918" b="11640">Normalisation.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="9130" t="11496" r="10133" b="11678">Proceedings</wd>

<space/>

<wd l="10334" t="11496" r="10526" b="11678">of</wd>

<space/>

</run>

</ln>

<ln l="6341" t="11726" r="10493" b="11909" baseLine="11866" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6341" t="11726" r="6979" b="11899">Natural</wd>

<space/>

<wd l="7152" t="11731" r="7963" b="11909">Language</wd>

<space/>

<wd l="8150" t="11731" r="9048" b="11909">Processing</wd>

<space/>

<wd l="9197" t="11726" r="9470" b="11909">for</wd>

<space/>

<wd l="9648" t="11731" r="10493" b="11909">Improving</wd>

<space/>

</ln>

<ln l="6360" t="11957" r="9926" b="12139" baseLine="12096">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6360" t="11957" r="6960" b="12101">Textual</wd>

<space/>

<wd l="6989" t="11957" r="8011" b="12139">Accessibility</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8064" t="11957" r="8995" b="12139">(NLP4ITA)</wd>

<space/>

<wd l="9053" t="11957" r="9926" b="12139">Workshop.</wd>

</run>

</ln>

</para>

<para l="6125" t="12307" r="10498" b="13603" alignment="justified" li="216" spaceBefore="104" fli="-216" lsp="exactly" lspExact="232" language="en">

<ln l="6125" t="12307" r="10488" b="12490" baseLine="12446" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6125" t="12307" r="6686" b="12480">Muniz,</wd>

<space/>

<wd l="6778" t="12307" r="7454" b="12480">M.C.M.;</wd>

<space/>

<wd l="7546" t="12312" r="8102" b="12480">Nunes,</wd>

<space/>

<wd l="8194" t="12307" r="8851" b="12480">M.G.V.;</wd>

<space/>

<wd l="8947" t="12312" r="9605" b="12490">Laporte.</wd>

<space/>

<wd l="9701" t="12312" r="9864" b="12451">E.</wd>

<space/>

<wd l="9960" t="12307" r="10488" b="12490">(2005)</wd>

<space/>

</ln>

<ln l="6360" t="12538" r="10478" b="12720" baseLine="12677" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="12538" r="7536" b="12710">&quot;UNITEX-PB,</wd>

<space/>

<wd l="7608" t="12581" r="7694" b="12682">a</wd>

<space/>

<wd l="7766" t="12557" r="7978" b="12682">set</wd>

<space/>

<wd l="8045" t="12538" r="8232" b="12682">of</wd>

<space/>

<wd l="8275" t="12538" r="8880" b="12682">flexible</wd>

<space/>

<wd l="8952" t="12538" r="9662" b="12720">language</wd>

<space/>

<wd l="9730" t="12581" r="10478" b="12682">resources</wd>

<space/>

</ln>

<ln l="6355" t="12768" r="10488" b="12950" baseLine="12907">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="12768" r="6590" b="12912">for</wd>

<space/>

<wd l="6734" t="12768" r="7469" b="12912">Brazilian</wd>

<space/>

<wd l="7618" t="12768" r="8630" b="12950">Portuguese&quot;,</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8784" t="12768" r="9787" b="12950">Proceedings</wd>

<space/>

<wd l="9946" t="12768" r="10138" b="12950">of</wd>

<space/>

<wd l="10253" t="12768" r="10488" b="12912">the</wd>

<space/>

</run>

</ln>

<ln l="6374" t="12998" r="10498" b="13181" baseLine="13133" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6374" t="12998" r="7157" b="13181">Workshop</wd>

<space/>

<wd l="7320" t="13046" r="7512" b="13142">on</wd>

<space/>

<wd l="7685" t="12998" r="8611" b="13181">Technology</wd>

<space/>

<wd l="8770" t="12998" r="8957" b="13181">of</wd>

<space/>

<wd l="9062" t="12998" r="10027" b="13181">Information</wd>

<space/>

<wd l="10190" t="12998" r="10498" b="13142">and</wd>

<space/>

</ln>

<ln l="6341" t="13229" r="10474" b="13411" baseLine="13363">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6341" t="13234" r="6936" b="13373">Human</wd>

<space/>

<wd l="7032" t="13234" r="7843" b="13411">Language</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7954" t="13229" r="8434" b="13411">(TIL),</wd>

<space/>

<wd l="8549" t="13229" r="8837" b="13373">São</wd>

<space/>

<wd l="8942" t="13229" r="9706" b="13411">Leopoldo</wd>

<space/>

<wd l="9816" t="13229" r="10474" b="13411">(Brazil):</wd>

<space/>

</run>

</ln>

<ln l="6350" t="13459" r="7099" b="13603" baseLine="13594" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="13459" r="7099" b="13603">Unisinos.</wd>

</ln>

</para>

<para l="6120" t="13810" r="10498" b="15370" alignment="justified" li="216" spaceBefore="108" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="6120" t="13810" r="10474" b="13992" baseLine="13944" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6120" t="13814" r="6677" b="13982">Nunes,</wd>

<space/>

<wd l="6821" t="13810" r="7478" b="13982">M.G.V.;</wd>

<space/>

<wd l="7632" t="13810" r="8174" b="13982">Vieira,</wd>

<space/>

<wd l="8318" t="13814" r="8467" b="13954">F.</wd>

<space/>

<wd l="8621" t="13814" r="8837" b="13954">M.</wd>

<space/>

<wd l="8990" t="13810" r="9211" b="13982">C.;</wd>

<space/>

<wd l="9365" t="13810" r="10099" b="13992">Zavaglia,</wd>

<space/>

<wd l="10253" t="13810" r="10474" b="13982">C.;</wd>

<space/>

</ln>

<ln l="6360" t="14040" r="10493" b="14222" baseLine="14174" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6360" t="14040" r="7157" b="14213">Sossolote,</wd>

<space/>

<wd l="7301" t="14040" r="7464" b="14184">C.</wd>

<space/>

<wd l="7608" t="14045" r="7776" b="14184">R.</wd>

<space/>

<wd l="7925" t="14040" r="8146" b="14213">C.;</wd>

<space/>

<wd l="8285" t="14040" r="9197" b="14213">Hernandez,</wd>

<space/>

<wd l="9336" t="14045" r="9451" b="14184">J.</wd>

<space/>

<wd l="9600" t="14040" r="10123" b="14222">(1996)</wd>

<space/>

<wd l="10262" t="14040" r="10493" b="14222">(In</wd>

<space/>

</ln>

<ln l="6350" t="14270" r="10478" b="14453" baseLine="14405" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14270" r="7296" b="14453">Portuguese)</wd>

<space/>

<wd l="7373" t="14270" r="7680" b="14414">The</wd>

<space/>

<wd l="7752" t="14270" r="8275" b="14453">design</wd>

<space/>

<wd l="8342" t="14270" r="8530" b="14414">of</wd>

<space/>

<wd l="8573" t="14314" r="8659" b="14414">a</wd>

<space/>

<wd l="8726" t="14270" r="9384" b="14414">Lexicon</wd>

<space/>

<wd l="9456" t="14270" r="9686" b="14414">for</wd>

<space/>

<wd l="9749" t="14270" r="10478" b="14414">Brazilian</wd>

<space/>

</ln>

<ln l="6350" t="14496" r="10474" b="14678" baseLine="14635" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14501" r="7282" b="14678">Portuguese:</wd>

<space/>

<wd l="7450" t="14501" r="8088" b="14640">Lessons</wd>

<space/>

<wd l="8251" t="14496" r="8837" b="14640">learned</wd>

<space/>

<wd l="9000" t="14496" r="9283" b="14640">and</wd>

<space/>

<wd l="9437" t="14496" r="10474" b="14678">Perspectives.</wd>

<space/>

</ln>

<ln l="6346" t="14726" r="10498" b="14909" baseLine="14866" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="14726" r="7349" b="14909">Proceedings</wd>

<space/>

<wd l="7421" t="14726" r="7613" b="14909">of</wd>

<space/>

<wd l="7642" t="14726" r="7882" b="14870">the</wd>

<space/>

<wd l="7939" t="14731" r="8098" b="14866">II</wd>

<space/>

<wd l="8174" t="14726" r="8957" b="14909">Workshop</wd>

<space/>

<wd l="9029" t="14774" r="9221" b="14870">on</wd>

<space/>

<wd l="9298" t="14726" r="10498" b="14909">Computational</wd>

<space/>

</ln>

<ln l="6346" t="14957" r="10478" b="15139" baseLine="15096">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6346" t="14962" r="7243" b="15139">Processing</wd>

<space/>

<wd l="7392" t="14957" r="7584" b="15139">of</wd>

<space/>

<wd l="7718" t="14962" r="8290" b="15101">Written</wd>

<space/>

<wd l="8448" t="14957" r="8755" b="15101">and</wd>

<space/>

<wd l="8899" t="14957" r="9389" b="15139">Speak</wd>

<space/>

</run>

<wd l="9523" t="14962" r="10478" b="15139"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Portuguese</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6355" t="15187" r="10099" b="15370" baseLine="15326" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="15187" r="7315" b="15360">CEFET-PR,</wd>

<space/>

<wd l="7378" t="15187" r="8069" b="15360">Curitiba,</wd>

<space/>

<wd l="8131" t="15187" r="8779" b="15331">October</wd>

<space/>

<wd l="8822" t="15187" r="9336" b="15360">23-25,</wd>

<space/>

<wd l="9389" t="15230" r="9528" b="15370">p.</wd>

<space/>

<wd l="9595" t="15187" r="10099" b="15331">61-70.</wd>

</ln>

</para>

</column>

</section>

<dd l="1409" t="15736" r="10531" b="15977">

<para l="5800" t="15787" r="6148" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15787" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="43">

<wd l="5866" t="15787" r="6082" b="15946">46</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4305.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1421" marginTop="1400" marginRight="6108" marginBottom="858" offsetX="8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1421" t="1400" r="5801" b="15980">

<column l="1421" t="1400" r="5801" b="15980">

<para l="1421" t="1459" r="5794" b="2333" alignment="justified" li="216" spaceBefore="18" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="1421" t="1459" r="5774" b="1637" baseLine="1594" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="1459" r="2434" b="1637">Ratnaparkhi,</wd>

<space/>

<wd l="2525" t="1459" r="2707" b="1603">A.</wd>

<space/>

<wd l="2808" t="1459" r="3379" b="1637">(1996).</wd>

<space/>

<wd l="3470" t="1459" r="3614" b="1598">A</wd>

<space/>

<wd l="3696" t="1459" r="4507" b="1603">maximum</wd>

<space/>

<wd l="4589" t="1478" r="5198" b="1637">entropy</wd>

<space/>

<wd l="5280" t="1459" r="5774" b="1603">model</wd>

<space/>

</ln>

<ln l="1651" t="1690" r="5779" b="1867" baseLine="1824">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="1690" r="1882" b="1834">for</wd>

<space/>

<wd l="1954" t="1690" r="3115" b="1867">part-of-speech</wd>

<space/>

<wd l="3192" t="1690" r="3830" b="1867">tagging.</wd>

<space/>

<wd l="3926" t="1694" r="4133" b="1834">In:</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4219" t="1690" r="5222" b="1867">Proceedings</wd>

<space/>

<wd l="5309" t="1690" r="5501" b="1867">of</wd>

<space/>

<wd l="5544" t="1690" r="5779" b="1834">the</wd>

<space/>

</run>

</ln>

<ln l="1651" t="1920" r="5794" b="2098" baseLine="2054" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="1920" r="2520" b="2098">conference</wd>

<space/>

<wd l="2707" t="1968" r="2894" b="2064">on</wd>

<space/>

<wd l="3082" t="1920" r="3850" b="2098">empirical</wd>

<space/>

<wd l="4018" t="1920" r="4685" b="2064">methods</wd>

<space/>

<wd l="4867" t="1930" r="5016" b="2064">in</wd>

<space/>

<wd l="5194" t="1920" r="5794" b="2064">natural</wd>

<space/>

</ln>

<ln l="1651" t="2150" r="5098" b="2333" baseLine="2285">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="2150" r="2390" b="2333">language</wd>

<space/>

<wd l="2419" t="2160" r="3312" b="2333">processing</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3365" t="2150" r="3763" b="2333">(Vol.</wd>

<space/>

<wd l="3845" t="2150" r="3970" b="2323">1,</wd>

<space/>

<wd l="4018" t="2194" r="4262" b="2333">pp.</wd>

<space/>

<wd l="4344" t="2150" r="5098" b="2333">133-142).</wd>

</run>

</ln>

</para>

<para l="1421" t="2501" r="5784" b="3370" alignment="justified" li="216" spaceBefore="117" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="1421" t="2501" r="5770" b="2683" baseLine="2635" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1421" t="2501" r="2390" b="2683">Ringlstetter,</wd>

<space/>

<wd l="2515" t="2501" r="2736" b="2674">C.;</wd>

<space/>

<wd l="2870" t="2501" r="3446" b="2674">Schulz,</wd>

<space/>

<wd l="3566" t="2506" r="3744" b="2645">K.</wd>

<space/>

<wd l="3869" t="2506" r="4109" b="2674">U.;</wd>

<space/>

<wd l="4229" t="2501" r="4805" b="2674">Mihov,</wd>

<space/>

<wd l="4934" t="2501" r="5074" b="2645">S.</wd>

<space/>

<wd l="5198" t="2501" r="5770" b="2683">(2006).</wd>

<space/>

</ln>

<ln l="1651" t="2731" r="5784" b="2914" baseLine="2866" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="2731" r="2707" b="2914">Orthographic</wd>

<space/>

<wd l="2755" t="2736" r="3250" b="2875">Errors</wd>

<space/>

<wd l="3307" t="2731" r="3456" b="2870">in</wd>

<space/>

<wd l="3499" t="2731" r="3874" b="2875">Web</wd>

<space/>

<wd l="3926" t="2736" r="4435" b="2914">Pages:</wd>

<space/>

<wd l="4493" t="2731" r="5112" b="2875">Toward</wd>

<space/>

<wd l="5165" t="2731" r="5784" b="2875">Cleaner</wd>

<space/>

</ln>

<ln l="1646" t="2957" r="5784" b="3139" baseLine="3096">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="2957" r="2021" b="3101">Web</wd>

<space/>

<wd l="2218" t="2957" r="2909" b="3139">Corpora.</wd>

<space/>

<wd l="3110" t="2962" r="3317" b="3101">In:</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3528" t="2957" r="4728" b="3139">Computational</wd>

<space/>

<wd l="4901" t="2962" r="5784" b="3139">Linguistics</wd>

<space/>

</run>

</ln>

<ln l="1646" t="3187" r="4454" b="3370" baseLine="3326" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="3187" r="2285" b="3331">Volume</wd>

<space/>

<wd l="2342" t="3187" r="2582" b="3360">32,</wd>

<space/>

<wd l="2635" t="3187" r="3298" b="3331">Number</wd>

<space/>

<wd l="3350" t="3187" r="3490" b="3360">3,</wd>

<space/>

<wd l="3538" t="3230" r="3682" b="3370">p.</wd>

<space/>

<wd l="3744" t="3187" r="4454" b="3331">295-340.</wd>

</ln>

</para>

<para l="1430" t="3538" r="5779" b="4411" alignment="justified" li="216" spaceBefore="117" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="1430" t="3538" r="5774" b="3720" baseLine="3677" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="3538" r="2160" b="3720">Schlippe,</wd>

<space/>

<wd l="2222" t="3542" r="2434" b="3710">T.;</wd>

<space/>

<wd l="2496" t="3538" r="2856" b="3710">Zhu,</wd>

<space/>

<wd l="2918" t="3538" r="3139" b="3710">C.;</wd>

<space/>

<wd l="3206" t="3538" r="3989" b="3710">Gebhardt,</wd>

<space/>

<wd l="4046" t="3542" r="4219" b="3710">J.;</wd>

<space/>

<wd l="4291" t="3538" r="4920" b="3710">Schultz,</wd>

<space/>

<wd l="4982" t="3542" r="5141" b="3682">T.</wd>

<space/>

<wd l="5203" t="3538" r="5774" b="3720">(2010).</wd>

<space/>

</ln>

<ln l="1651" t="3768" r="5774" b="3912" baseLine="3907" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="3773" r="2016" b="3912">Text</wd>

<space/>

<wd l="2126" t="3768" r="3235" b="3912">normalization</wd>

<space/>

<wd l="3346" t="3768" r="3806" b="3912">based</wd>

<space/>

<wd l="3926" t="3811" r="4123" b="3912">on</wd>

<space/>

<wd l="4243" t="3768" r="4982" b="3912">statistical</wd>

<space/>

<wd l="5102" t="3768" r="5774" b="3912">machine</wd>

<space/>

</ln>

<ln l="1646" t="3998" r="5779" b="4181" baseLine="4138">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="3998" r="2491" b="4142">translation</wd>

<space/>

<wd l="2578" t="3998" r="2861" b="4142">and</wd>

<space/>

<wd l="2952" t="3998" r="3562" b="4142">internet</wd>

<space/>

<wd l="3643" t="4042" r="3984" b="4142">user</wd>

<space/>

<wd l="4075" t="4018" r="4704" b="4181">support.</wd>

<space/>

</run>

<wd l="4795" t="3998" r="5779" b="4181"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Interspeech</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1646" t="4229" r="3350" b="4411" baseLine="4363" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="4229" r="2093" b="4402">2010,</wd>

<space/>

<wd l="2141" t="4272" r="2386" b="4411">pp.</wd>

<space/>

<wd l="2467" t="4229" r="3350" b="4373">1816-1819.</wd>

</ln>

</para>

<para l="1430" t="4579" r="5789" b="5683" alignment="justified" li="216" spaceBefore="126" spaceAfter="10281" fli="-216" lsp="exactly" lspExact="230" language="en">

<ln l="1430" t="4579" r="5770" b="4762" baseLine="4714" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1430" t="4579" r="2117" b="4762">Søgaard,</wd>

<space/>

<wd l="2203" t="4579" r="2438" b="4752">A.,</wd>

<space/>

<wd l="2530" t="4579" r="3403" b="4752">Johannsen,</wd>

<space/>

<wd l="3494" t="4579" r="3730" b="4752">A.,</wd>

<space/>

<wd l="3821" t="4579" r="4315" b="4752">Plank,</wd>

<space/>

<wd l="4406" t="4584" r="4632" b="4752">B.,</wd>

<space/>

<wd l="4723" t="4584" r="5208" b="4762">Hovy,</wd>

<space/>

<wd l="5294" t="4584" r="5530" b="4752">D.,</wd>

<space/>

<wd l="5626" t="4579" r="5770" b="4723">&amp;</wd>

<space/>

</ln>

<ln l="1646" t="4810" r="5774" b="4992" baseLine="4944">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="4810" r="2410" b="4982">Martinez,</wd>

<space/>

<wd l="2486" t="4814" r="2669" b="4954">H.</wd>

<space/>

<wd l="2755" t="4810" r="3317" b="4992">(2014).</wd>

<space/>

<wd l="3398" t="4810" r="3965" b="4954">What’s</wd>

<space/>

<wd l="4046" t="4810" r="4200" b="4949">in</wd>

<space/>

<wd l="4272" t="4853" r="4358" b="4954">a</wd>

<space/>

</run>

<wd l="4421" t="4810" r="5021" b="4992"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">p</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-value</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5098" t="4810" r="5251" b="4949">in</wd>

<space/>

<wd l="5314" t="4810" r="5774" b="4954">NLP?</wd>

<space/>

</run>

</ln>

<ln l="1651" t="5040" r="5779" b="5222" baseLine="5174">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="5045" r="1853" b="5184">In:</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1963" t="5040" r="2966" b="5222">Proceedings</wd>

<space/>

<wd l="3072" t="5040" r="3264" b="5222">of</wd>

<space/>

<wd l="3331" t="5040" r="3566" b="5184">the</wd>

<space/>

<wd l="3677" t="5040" r="4502" b="5222">eighteenth</wd>

<space/>

<wd l="4613" t="5040" r="5482" b="5222">conference</wd>

<space/>

<wd l="5587" t="5088" r="5779" b="5184">on</wd>

<space/>

</run>

</ln>

<ln l="1651" t="5270" r="5789" b="5453" baseLine="5405" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="5270" r="2808" b="5453">computational</wd>

<space/>

<wd l="3125" t="5270" r="3725" b="5414">natural</wd>

<space/>

<wd l="4046" t="5270" r="4781" b="5453">language</wd>

<space/>

<wd l="5112" t="5270" r="5789" b="5453">learning</wd>

<space/>

</ln>

<ln l="1651" t="5501" r="3518" b="5683" baseLine="5635">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="5501" r="2755" b="5683">(CONLL’14),</wd>

<space/>

<wd l="2808" t="5544" r="3053" b="5683">pp.</wd>

<space/>

</run>

<wd l="3130" t="5501" r="3518" b="5645"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">-10.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="5801" t="15736" r="6176" b="15980">

<para l="5801" t="15792" r="6143" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="17">

<wd l="5866" t="15792" r="6077" b="15946">47</wd>

</ln>

</para>

</dd>

</body>

</page>

</document>

