<?xml version="1.0" encoding="UTF-16"?>

<!--XML document generated using OCR technology from Nuance Communications, Inc.-->

<document xmlns="http://www.scansoft.com/omnipage/xml/ssdoc-schema3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4306.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1400" marginRight="1378" marginBottom="358" offsetX="-30" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1400" r="10531" b="2296">

<column l="1440" t="1400" r="10531" b="2296">

<para l="2093" t="1478" r="9864" b="1742" alignment="centered" spaceBefore="22" spaceAfter="535" lsp="exactly" lspExact="332" language="en">

<ln l="2093" t="1478" r="9864" b="1742" baseLine="1675" bold="true" underlined="none" subsuperscript="none" fontSize="1450" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="2093" t="1478" r="2914" b="1690">USFD:</wd>

<space/>

<wd l="3014" t="1478" r="3912" b="1685">Twitter</wd>

<space/>

<wd l="3984" t="1483" r="4589" b="1690">NER</wd>

<space/>

<wd l="4666" t="1478" r="5198" b="1685">with</wd>

<space/>

<wd l="5275" t="1478" r="5880" b="1685">Drift</wd>

<space/>

<wd l="5962" t="1478" r="7723" b="1742">Compensation</wd>

<space/>

<wd l="7800" t="1483" r="8251" b="1685">and</wd>

<space/>

<wd l="8333" t="1478" r="9197" b="1685">Linked</wd>

<space/>

<wd l="9274" t="1483" r="9864" b="1685">Data</wd>

</ln>

</para>

</column>

</section>

<section l="1668" t="2296" r="10294" b="3126">

<column l="1668" t="2296" r="4318" b="3126">

<para l="1670" t="2347" r="4310" b="2846" alignment="centered" lsp="exactly" lspExact="275" language="en">

<ln l="2131" t="2347" r="3845" b="2568" baseLine="2510" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="2131" t="2352" r="2650" b="2520">Leon</wd>

<space/>

<wd l="2712" t="2347" r="3845" b="2568">Derczynski
</wd>

</ln>

<ln l="1670" t="2630" r="4310" b="2846" baseLine="2794" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="1670" t="2630" r="2664" b="2846">University</wd>

<space/>

<wd l="2736" t="2630" r="2947" b="2798">of</wd>

<space/>

<wd l="3000" t="2630" r="3893" b="2827">Sheffield,</wd>

<space/>

<wd l="3965" t="2635" r="4310" b="2798">UK</wd>

</ln>

</para>

<para l="1958" t="2957" r="3984" b="3086" alignment="centered" spaceBefore="86" lsp="exactly" lspExact="192" language="en">

<ln l="1958" t="2957" r="3984" b="3086" baseLine="3072" underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-1">

<wd l="1958" t="2957" r="3984" b="3086">leon@dcs.shef.ac.uk</wd>

</ln>

</para>

</column>

<column l="4656" t="2296" r="7306" b="3126">

<para l="4656" t="2347" r="7301" b="2846" alignment="centered" lsp="exactly" lspExact="275" language="en">

<ln l="4997" t="2347" r="6960" b="2568" baseLine="2510" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="4997" t="2352" r="5774" b="2520">Isabelle</wd>

<space/>

<wd l="5837" t="2347" r="6960" b="2568">Augenstein
</wd>

</ln>

<ln l="4656" t="2626" r="7301" b="2846" baseLine="2794" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="4656" t="2630" r="5654" b="2846">University</wd>

<space/>

<wd l="5722" t="2630" r="5933" b="2798">of</wd>

<space/>

<wd l="5986" t="2630" r="6883" b="2827">Sheffield,</wd>

<space/>

<wd l="6955" t="2635" r="7301" b="2798">UK</wd>

</ln>

</para>

<para l="4733" t="2957" r="7186" b="3086" alignment="left" li="72" spaceBefore="86" lsp="exactly" lspExact="192" language="en">

<ln l="4733" t="2957" r="7186" b="3086" baseLine="3072" underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-1">

<wd l="4733" t="2957" r="7186" b="3086">isabelle@dcs.shef.ac.uk</wd>

</ln>

</para>

</column>

<column l="7644" t="2296" r="10294" b="3126">

<para l="7646" t="2347" r="10286" b="2846" alignment="centered" lsp="exactly" lspExact="275" language="en">

<ln l="8059" t="2347" r="9878" b="2520" baseLine="2510" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="8059" t="2347" r="8746" b="2520">Kalina</wd>

<space/>

<wd l="8808" t="2352" r="9878" b="2520">Bontcheva
</wd>

</ln>

<ln l="7646" t="2626" r="10286" b="2846" baseLine="2794" underlined="none" subsuperscript="none" fontSize="1150" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="7646" t="2630" r="8640" b="2846">University</wd>

<space/>

<wd l="8712" t="2630" r="8923" b="2798">of</wd>

<space/>

<wd l="8976" t="2630" r="9869" b="2827">Sheffield,</wd>

<space/>

<wd l="9941" t="2635" r="10286" b="2798">UK</wd>

</ln>

</para>

<para l="7824" t="2957" r="10066" b="3086" alignment="left" li="144" spaceBefore="86" lsp="exactly" lspExact="192" language="en">

<ln l="7824" t="2957" r="10066" b="3086" baseLine="3072" underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="-1">

<wd l="7824" t="2957" r="10066" b="3086">kalina@dcs.shef.ac.uk</wd>

</ln>

</para>

</column>

</section>

<section l="1440" t="4120" r="10531" b="15363">

<column l="1440" t="4120" r="5822" b="15363">

<para l="3182" t="4166" r="4070" b="4339" alignment="centered" spaceBefore="3" lsp="exactly" lspExact="279" language="en">

<ln l="3182" t="4166" r="4070" b="4339" baseLine="4334" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3182" t="4166" r="4070" b="4339">Abstract</wd>

</ln>

</para>

<para l="1781" t="4747" r="5472" b="8002" alignment="justified" li="288" ri="360" spaceBefore="283" lsp="exactly" lspExact="240" language="en">

<ln l="1781" t="4747" r="5467" b="4934" baseLine="4886" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="4747" r="2131" b="4891">This</wd>

<space/>

<wd l="2208" t="4790" r="2654" b="4930">paper</wd>

<space/>

<wd l="2731" t="4747" r="3461" b="4891">describes</wd>

<space/>

<wd l="3542" t="4790" r="3629" b="4891">a</wd>

<space/>

<wd l="3701" t="4747" r="4070" b="4930">pilot</wd>

<space/>

<wd l="4138" t="4752" r="4536" b="4891">NER</wd>

<space/>

<wd l="4618" t="4766" r="5165" b="4934">system</wd>

<space/>

<wd l="5237" t="4747" r="5467" b="4891">for</wd>

<space/>

</ln>

<ln l="1781" t="4987" r="5458" b="5174" baseLine="5122" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="4987" r="2390" b="5155">Twitter,</wd>

<space/>

<wd l="2539" t="4987" r="3427" b="5174">comprising</wd>

<space/>

<wd l="3547" t="4987" r="3792" b="5131">the</wd>

<space/>

<wd l="3912" t="4987" r="4416" b="5131">USFD</wd>

<space/>

<wd l="4550" t="5006" r="5098" b="5174">system</wd>

<space/>

<wd l="5218" t="5030" r="5458" b="5131">en-</wd>

</ln>

<ln l="1781" t="5227" r="5458" b="5414" baseLine="5362" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="5246" r="2002" b="5414">try</wd>

<space/>

<wd l="2107" t="5246" r="2261" b="5371">to</wd>

<space/>

<wd l="2366" t="5227" r="2606" b="5371">the</wd>

<space/>

<wd l="2712" t="5232" r="3365" b="5371">W-NUT</wd>

<space/>

<wd l="3475" t="5227" r="3859" b="5371">2015</wd>

<space/>

<wd l="3970" t="5232" r="4373" b="5371">NER</wd>

<space/>

<wd l="4483" t="5227" r="4997" b="5371">shared</wd>

<space/>

<wd l="5098" t="5227" r="5458" b="5371">task.</wd>

<space/>

</ln>

<ln l="1781" t="5467" r="5472" b="5654" baseLine="5602" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="5467" r="2093" b="5611">The</wd>

<space/>

<wd l="2203" t="5467" r="2539" b="5654">goal</wd>

<space/>

<wd l="2645" t="5467" r="2770" b="5611">is</wd>

<space/>

<wd l="2880" t="5486" r="3034" b="5611">to</wd>

<space/>

<wd l="3144" t="5467" r="3845" b="5654">correctly</wd>

<space/>

<wd l="3950" t="5467" r="4334" b="5611">label</wd>

<space/>

<wd l="4445" t="5467" r="5011" b="5611">entities</wd>

<space/>

<wd l="5122" t="5467" r="5275" b="5606">in</wd>

<space/>

<wd l="5386" t="5510" r="5472" b="5611">a</wd>

<space/>

</ln>

<ln l="1781" t="5707" r="5458" b="5894" baseLine="5842" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="5726" r="2218" b="5851">tweet</wd>

<space/>

<wd l="2275" t="5707" r="2866" b="5875">dataset,</wd>

<space/>

<wd l="2933" t="5707" r="3360" b="5894">using</wd>

<space/>

<wd l="3427" t="5750" r="3605" b="5851">an</wd>

<space/>

<wd l="3667" t="5707" r="4416" b="5894">inventory</wd>

<space/>

<wd l="4478" t="5707" r="4651" b="5851">of</wd>

<space/>

<wd l="4699" t="5726" r="4944" b="5851">ten</wd>

<space/>

<wd l="4997" t="5726" r="5458" b="5894">types.</wd>

<space/>

</ln>

<ln l="1781" t="5942" r="5467" b="6130" baseLine="6082" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="5947" r="2040" b="6086">We</wd>

<space/>

<wd l="2141" t="5942" r="2731" b="6130">employ</wd>

<space/>

<wd l="2837" t="5942" r="3624" b="6086">structured</wd>

<space/>

<wd l="3715" t="5942" r="4416" b="6130">learning,</wd>

<space/>

<wd l="4531" t="5942" r="5170" b="6130">drawing</wd>

<space/>

<wd l="5275" t="5986" r="5467" b="6086">on</wd>

<space/>

</ln>

<ln l="1786" t="6182" r="5458" b="6370" baseLine="6322" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1786" t="6202" r="2573" b="6370">gazetteers</wd>

<space/>

<wd l="2626" t="6182" r="3058" b="6326">taken</wd>

<space/>

<wd l="3101" t="6182" r="3494" b="6326">from</wd>

<space/>

<wd l="3538" t="6182" r="4099" b="6326">Linked</wd>

<space/>

<wd l="4147" t="6187" r="4565" b="6350">Data,</wd>

<space/>

<wd l="4627" t="6182" r="4910" b="6326">and</wd>

<space/>

<wd l="4958" t="6226" r="5155" b="6326">on</wd>

<space/>

<wd l="5203" t="6226" r="5458" b="6326">un-</wd>

</ln>

<ln l="1790" t="6422" r="5462" b="6610" baseLine="6557" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1790" t="6422" r="2635" b="6605">supervised</wd>

<space/>

<wd l="2698" t="6422" r="3475" b="6610">clustering</wd>

<space/>

<wd l="3533" t="6422" r="4210" b="6590">features,</wd>

<space/>

<wd l="4277" t="6422" r="4560" b="6566">and</wd>

<space/>

<wd l="4622" t="6422" r="5462" b="6610">attempting</wd>

<space/>

</ln>

<ln l="1781" t="6662" r="5472" b="6850" baseLine="6797" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="6682" r="1934" b="6806">to</wd>

<space/>

<wd l="2006" t="6682" r="2942" b="6845">compensate</wd>

<space/>

<wd l="3010" t="6662" r="3240" b="6806">for</wd>

<space/>

<wd l="3317" t="6662" r="3922" b="6850">stylistic</wd>

<space/>

<wd l="3994" t="6662" r="4282" b="6806">and</wd>

<space/>

<wd l="4344" t="6662" r="4742" b="6845">topic</wd>

<space/>

<wd l="4810" t="6662" r="5150" b="6806">drift</wd>

<space/>

<wd l="5213" t="6749" r="5314" b="6763">–</wd>

<space/>

<wd l="5386" t="6706" r="5472" b="6806">a</wd>

<space/>

</ln>

<ln l="1781" t="6902" r="5472" b="7090" baseLine="7037" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="6902" r="2064" b="7090">key</wd>

<space/>

<wd l="2126" t="6902" r="2885" b="7090">challenge</wd>

<space/>

<wd l="2947" t="6902" r="3101" b="7042">in</wd>

<space/>

<wd l="3168" t="6902" r="3619" b="7046">social</wd>

<space/>

<wd l="3682" t="6902" r="4171" b="7046">media</wd>

<space/>

<wd l="4224" t="6922" r="4560" b="7046">text.</wd>

<space/>

<wd l="4661" t="6902" r="4968" b="7046">Our</wd>

<space/>

<wd l="5026" t="6902" r="5472" b="7046">result</wd>

<space/>

</ln>

<ln l="1781" t="7138" r="5467" b="7325" baseLine="7277" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1781" t="7138" r="1910" b="7282">is</wd>

<space/>

<wd l="1987" t="7138" r="2966" b="7320">competitive;</wd>

<space/>

<wd l="3048" t="7181" r="3278" b="7282">we</wd>

<space/>

<wd l="3350" t="7138" r="3955" b="7320">provide</wd>

<space/>

<wd l="4027" t="7181" r="4210" b="7282">an</wd>

<space/>

<wd l="4286" t="7138" r="4915" b="7325">analysis</wd>

<space/>

<wd l="4997" t="7138" r="5165" b="7282">of</wd>

<space/>

<wd l="5222" t="7138" r="5467" b="7282">the</wd>

<space/>

</ln>

<ln l="1786" t="7378" r="5458" b="7565" baseLine="7517" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1786" t="7397" r="2741" b="7560">components</wd>

<space/>

<wd l="2827" t="7378" r="3000" b="7522">of</wd>

<space/>

<wd l="3072" t="7421" r="3336" b="7522">our</wd>

<space/>

<wd l="3413" t="7378" r="4493" b="7565">methodology,</wd>

<space/>

<wd l="4589" t="7378" r="4872" b="7522">and</wd>

<space/>

<wd l="4954" t="7421" r="5136" b="7522">an</wd>

<space/>

<wd l="5222" t="7421" r="5458" b="7522">ex-</wd>

</ln>

<ln l="1786" t="7618" r="5472" b="7805" baseLine="7752" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1786" t="7618" r="2582" b="7762">amination</wd>

<space/>

<wd l="2654" t="7618" r="2827" b="7762">of</wd>

<space/>

<wd l="2885" t="7618" r="3130" b="7762">the</wd>

<space/>

<wd l="3202" t="7637" r="3653" b="7805">target</wd>

<space/>

<wd l="3725" t="7618" r="4277" b="7762">dataset</wd>

<space/>

<wd l="4344" t="7618" r="4502" b="7757">in</wd>

<space/>

<wd l="4570" t="7618" r="4810" b="7762">the</wd>

<space/>

<wd l="4891" t="7637" r="5472" b="7762">context</wd>

<space/>

</ln>

<ln l="1786" t="7858" r="2698" b="8002" baseLine="7992" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1786" t="7858" r="1958" b="8002">of</wd>

<space/>

<wd l="1997" t="7858" r="2280" b="8002">this</wd>

<space/>

<wd l="2338" t="7858" r="2698" b="8002">task.</wd>

</ln>

</para>

<para l="1459" t="8419" r="3096" b="8592" alignment="left" spaceBefore="326" lsp="exactly" lspExact="279" language="en">

<ln l="1459" t="8419" r="3096" b="8592" baseLine="8582" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="10">

<wd l="1459" t="8419" r="1555" b="8587">1</wd>

<space/>

<wd l="1805" t="8419" r="3096" b="8592">Introduction</wd>

</ln>

</para>

<para l="1445" t="8914" r="5813" b="11966" alignment="justified" spaceBefore="201" lsp="exactly" lspExact="240" language="en">

<ln l="1450" t="8914" r="5808" b="9101" baseLine="9048" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="8914" r="1939" b="9058">Social</wd>

<space/>

<wd l="2016" t="8914" r="2506" b="9058">media</wd>

<space/>

<wd l="2578" t="8914" r="2707" b="9058">is</wd>

<space/>

<wd l="2794" t="8957" r="2880" b="9058">a</wd>

<space/>

<wd l="2952" t="8957" r="3298" b="9101">very</wd>

<space/>

<wd l="3384" t="8914" r="4306" b="9101">challenging</wd>

<space/>

<wd l="4387" t="8957" r="4824" b="9101">genre</wd>

<space/>

<wd l="4901" t="8914" r="5136" b="9058">for</wd>

<space/>

<wd l="5213" t="8914" r="5808" b="9058">Natural</wd>

<space/>

</ln>

<ln l="1445" t="9149" r="5803" b="9336" baseLine="9288" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="9154" r="2227" b="9336">Language</wd>

<space/>

<wd l="2270" t="9149" r="3130" b="9336">Processing</wd>

<space/>

<wd l="3182" t="9149" r="3672" b="9326">(NLP)</wd>

<space/>

<wd l="3730" t="9149" r="4694" b="9336">(Derczynski</wd>

<space/>

<wd l="4742" t="9168" r="4882" b="9293">et</wd>

<space/>

<wd l="4925" t="9149" r="5155" b="9317">al.,</wd>

<space/>

<wd l="5208" t="9149" r="5803" b="9326">2013a),</wd>

<space/>

</ln>

<ln l="1445" t="9389" r="5813" b="9576" baseLine="9528" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="9389" r="2213" b="9576">providing</wd>

<space/>

<wd l="2266" t="9389" r="3278" b="9576">high-volume</wd>

<space/>

<wd l="3336" t="9389" r="4373" b="9576">linguistically</wd>

<space/>

<wd l="4430" t="9389" r="5458" b="9576">idiosyncratic</wd>

<space/>

<wd l="5510" t="9408" r="5813" b="9533">text</wd>

<space/>

</ln>

<ln l="1445" t="9629" r="5808" b="9816" baseLine="9763" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="9629" r="1752" b="9773">rich</wd>

<space/>

<wd l="1795" t="9629" r="1949" b="9768">in</wd>

<space/>

<wd l="1992" t="9629" r="2434" b="9773">latent</wd>

<space/>

<wd l="2482" t="9629" r="3072" b="9816">signals,</wd>

<space/>

<wd l="3120" t="9629" r="3360" b="9773">the</wd>

<space/>

<wd l="3408" t="9648" r="3960" b="9773">correct</wd>

<space/>

<wd l="3998" t="9629" r="5074" b="9811">interpretation</wd>

<space/>

<wd l="5117" t="9629" r="5290" b="9773">of</wd>

<space/>

<wd l="5323" t="9629" r="5808" b="9773">which</wd>

<space/>

</ln>

<ln l="1445" t="9869" r="5798" b="10051" baseLine="10003" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="9869" r="2078" b="10051">requires</wd>

<space/>

<wd l="2155" t="9869" r="2712" b="10013">diverse</wd>

<space/>

<wd l="2784" t="9869" r="3605" b="10013">contextual</wd>

<space/>

<wd l="3677" t="9869" r="3960" b="10013">and</wd>

<space/>

<wd l="4027" t="9869" r="5050" b="10013">author-based</wd>

<space/>

<wd l="5112" t="9869" r="5798" b="10013">informa-</wd>

</ln>

<ln l="1445" t="10109" r="5808" b="10296" baseLine="10243" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="10109" r="1790" b="10253">tion.</wd>

<space/>

<wd l="1978" t="10109" r="3096" b="10296">Consequently,</wd>

<space/>

<wd l="3197" t="10109" r="3480" b="10253">this</wd>

<space/>

<wd l="3571" t="10109" r="3998" b="10296">noisy</wd>

<space/>

<wd l="4094" t="10128" r="4675" b="10253">content</wd>

<space/>

<wd l="4762" t="10109" r="5342" b="10253">renders</wd>

<space/>

<wd l="5434" t="10114" r="5808" b="10253">NLP</wd>

<space/>

</ln>

<ln l="1454" t="10344" r="5803" b="10531" baseLine="10483" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1454" t="10363" r="2069" b="10531">systems</wd>

<space/>

<wd l="2126" t="10344" r="2678" b="10488">trained</wd>

<space/>

<wd l="2731" t="10387" r="2928" b="10488">on</wd>

<space/>

<wd l="2976" t="10387" r="3384" b="10488">more</wd>

<space/>

<wd l="3442" t="10344" r="4277" b="10512">consistent,</wd>

<space/>

<wd l="4334" t="10344" r="4843" b="10531">longer</wd>

<space/>

<wd l="4901" t="10344" r="5803" b="10512">documents,</wd>

<space/>

</ln>

<ln l="1454" t="10584" r="5803" b="10771" baseLine="10723" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1454" t="10584" r="1810" b="10728">such</wd>

<space/>

<wd l="1872" t="10627" r="2026" b="10728">as</wd>

<space/>

<wd l="2093" t="10584" r="2894" b="10752">newswire,</wd>

<space/>

<wd l="2962" t="10584" r="3499" b="10771">mostly</wd>

<space/>

<wd l="3562" t="10584" r="4272" b="10766">impotent</wd>

<space/>

<wd l="4339" t="10584" r="5299" b="10771">(Derczynski</wd>

<space/>

<wd l="5366" t="10603" r="5510" b="10728">et</wd>

<space/>

<wd l="5568" t="10584" r="5803" b="10752">al.,</wd>

<space/>

</ln>

<ln l="1450" t="10824" r="5808" b="11011" baseLine="10958" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="10824" r="2045" b="11002">2015b).</wd>

<space/>

<wd l="2122" t="10824" r="2861" b="11011">Suffering</wd>

<space/>

<wd l="2914" t="10824" r="3302" b="10968">from</wd>

<space/>

<wd l="3355" t="10867" r="3442" b="10968">a</wd>

<space/>

<wd l="3494" t="10824" r="4229" b="10968">sustained</wd>

<space/>

<wd l="4277" t="10824" r="4771" b="10968">dearth</wd>

<space/>

<wd l="4824" t="10824" r="4997" b="10968">of</wd>

<space/>

<wd l="5040" t="10824" r="5808" b="10968">annotated</wd>

<space/>

</ln>

<ln l="1445" t="11064" r="5813" b="11251" baseLine="11198" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="11064" r="2016" b="11208">Twitter</wd>

<space/>

<wd l="2088" t="11064" r="2760" b="11232">datasets,</wd>

<space/>

<wd l="2842" t="11064" r="2952" b="11208">it</wd>

<space/>

<wd l="3019" t="11107" r="3360" b="11251">may</wd>

<space/>

<wd l="3437" t="11064" r="3619" b="11208">be</wd>

<space/>

<wd l="3691" t="11064" r="4176" b="11208">useful</wd>

<space/>

<wd l="4253" t="11083" r="4402" b="11208">to</wd>

<space/>

<wd l="4478" t="11064" r="5352" b="11208">understand</wd>

<space/>

<wd l="5419" t="11064" r="5813" b="11208">what</wd>

<space/>

</ln>

<ln l="1445" t="11304" r="5803" b="11491" baseLine="11438" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="11304" r="1944" b="11448">makes</wd>

<space/>

<wd l="2002" t="11304" r="2285" b="11448">this</wd>

<space/>

<wd l="2342" t="11347" r="2779" b="11491">genre</wd>

<space/>

<wd l="2832" t="11304" r="3173" b="11472">tick,</wd>

<space/>

<wd l="3240" t="11304" r="3523" b="11448">and</wd>

<space/>

<wd l="3571" t="11304" r="3907" b="11448">how</wd>

<space/>

<wd l="3965" t="11347" r="4229" b="11448">our</wd>

<space/>

<wd l="4282" t="11304" r="4901" b="11491">existing</wd>

<space/>

<wd l="4958" t="11304" r="5803" b="11486">techniques</wd>

<space/>

</ln>

<ln l="1450" t="11539" r="5813" b="11726" baseLine="11678" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="11539" r="1733" b="11683">and</wd>

<space/>

<wd l="1810" t="11582" r="2558" b="11683">resources</wd>

<space/>

<wd l="2650" t="11582" r="2918" b="11683">can</wd>

<space/>

<wd l="3000" t="11539" r="3187" b="11683">be</wd>

<space/>

<wd l="3274" t="11539" r="4176" b="11726">generalised</wd>

<space/>

<wd l="4258" t="11539" r="4714" b="11683">better</wd>

<space/>

<wd l="4790" t="11558" r="4944" b="11683">to</wd>

<space/>

<wd l="5030" t="11539" r="5194" b="11683">fit</wd>

<space/>

<wd l="5285" t="11539" r="5640" b="11683">such</wd>

<space/>

<wd l="5726" t="11582" r="5813" b="11683">a</wd>

<space/>

</ln>

<ln l="1450" t="11779" r="3326" b="11966" baseLine="11918" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1450" t="11779" r="2371" b="11966">challenging</wd>

<space/>

<wd l="2424" t="11798" r="2722" b="11923">text</wd>

<space/>

<wd l="2779" t="11822" r="3326" b="11923">source.</wd>

</ln>

</para>

<para l="1445" t="12058" r="5813" b="15355" alignment="justified" spaceBefore="24" fli="216" lsp="exactly" lspExact="239" language="en">

<ln l="1642" t="12058" r="5808" b="12245" baseLine="12192" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1642" t="12058" r="1992" b="12202">This</wd>

<space/>

<wd l="2074" t="12101" r="2515" b="12240">paper</wd>

<space/>

<wd l="2587" t="12058" r="2851" b="12202">has</wd>

<space/>

<wd l="2928" t="12058" r="3552" b="12202">focused</wd>

<space/>

<wd l="3629" t="12101" r="3826" b="12202">on</wd>

<space/>

<wd l="3898" t="12058" r="4814" b="12245">introducing</wd>

<space/>

<wd l="4896" t="12101" r="5160" b="12202">our</wd>

<space/>

<wd l="5232" t="12058" r="5808" b="12202">Named</wd>

<space/>

</ln>

<ln l="1445" t="12298" r="5798" b="12485" baseLine="12432" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="12298" r="1925" b="12485">Entity</wd>

<space/>

<wd l="2011" t="12298" r="2990" b="12485">Recognition</wd>

<space/>

<wd l="3077" t="12298" r="3590" b="12475">(NER)</wd>

<space/>

<wd l="3686" t="12317" r="4090" b="12485">entry</wd>

<space/>

<wd l="4176" t="12317" r="4325" b="12442">to</wd>

<space/>

<wd l="4411" t="12298" r="4656" b="12442">the</wd>

<space/>

<wd l="4738" t="12302" r="5338" b="12442">WNUT</wd>

<space/>

<wd l="5424" t="12298" r="5798" b="12442">eval-</wd>

</ln>

<ln l="1445" t="12538" r="5803" b="12725" baseLine="12672" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="12538" r="1944" b="12682">uation</wd>

<space/>

<wd l="2016" t="12538" r="2770" b="12725">challenge</wd>

<space/>

<wd l="2851" t="12538" r="3586" b="12715">(Baldwin</wd>

<space/>

<wd l="3658" t="12557" r="3797" b="12682">et</wd>

<space/>

<wd l="3869" t="12538" r="4099" b="12706">al.,</wd>

<space/>

<wd l="4186" t="12538" r="4690" b="12715">2015),</wd>

<space/>

<wd l="4771" t="12538" r="5256" b="12682">which</wd>

<space/>

<wd l="5323" t="12538" r="5803" b="12682">builds</wd>

<space/>

</ln>

<ln l="1450" t="12773" r="5808" b="12955" baseLine="12912" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="12816" r="1642" b="12917">on</wd>

<space/>

<wd l="1699" t="12816" r="1958" b="12917">our</wd>

<space/>

<wd l="2016" t="12773" r="2520" b="12917">earlier</wd>

<space/>

<wd l="2573" t="12773" r="3533" b="12955">experiments</wd>

<space/>

<wd l="3590" t="12773" r="3946" b="12917">with</wd>

<space/>

<wd l="3994" t="12773" r="4565" b="12917">Twitter</wd>

<space/>

<wd l="4622" t="12773" r="4906" b="12917">and</wd>

<space/>

<wd l="4954" t="12816" r="5352" b="12917">news</wd>

<space/>

<wd l="5410" t="12778" r="5808" b="12917">NER</wd>

<space/>

</ln>

<ln l="1454" t="13013" r="5803" b="13200" baseLine="13152" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1454" t="13013" r="2414" b="13200">(Derczynski</wd>

<space/>

<wd l="2506" t="13013" r="2789" b="13157">and</wd>

<space/>

<wd l="2870" t="13013" r="3758" b="13181">Bontcheva,</wd>

<space/>

<wd l="3859" t="13013" r="4301" b="13181">2014;</wd>

<space/>

<wd l="4411" t="13013" r="5256" b="13157">Bontcheva</wd>

<space/>

<wd l="5342" t="13032" r="5482" b="13157">et</wd>

<space/>

<wd l="5568" t="13013" r="5803" b="13181">al.,</wd>

<space/>

</ln>

<ln l="1450" t="13253" r="5808" b="13440" baseLine="13387" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="13253" r="1891" b="13421">2013;</wd>

<space/>

<wd l="1949" t="13253" r="2981" b="13440">Cunningham</wd>

<space/>

<wd l="3029" t="13272" r="3168" b="13397">et</wd>

<space/>

<wd l="3221" t="13253" r="3451" b="13421">al.,</wd>

<space/>

<wd l="3514" t="13253" r="4013" b="13430">2002).</wd>

<space/>

<wd l="4085" t="13258" r="4248" b="13392">In</wd>

<space/>

<wd l="4296" t="13253" r="5098" b="13435">particular,</wd>

<space/>

<wd l="5150" t="13296" r="5381" b="13397">we</wd>

<space/>

<wd l="5434" t="13253" r="5808" b="13435">push</wd>

<space/>

</ln>

<ln l="1450" t="13493" r="5803" b="13680" baseLine="13627" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="13493" r="1776" b="13637">data</wd>

<space/>

<wd l="1858" t="13536" r="2438" b="13637">sources</wd>

<space/>

<wd l="2520" t="13493" r="2803" b="13637">and</wd>

<space/>

<wd l="2875" t="13493" r="4123" b="13675">representations,</wd>

<space/>

<wd l="4205" t="13493" r="4632" b="13680">using</wd>

<space/>

<wd l="4709" t="13493" r="5098" b="13637">what</wd>

<space/>

<wd l="5165" t="13493" r="5294" b="13637">is</wd>

<space/>

<wd l="5371" t="13493" r="5803" b="13637">know</wd>

<space/>

</ln>

<ln l="1450" t="13733" r="5803" b="13877" baseLine="13867" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="13733" r="1886" b="13877">about</wd>

<space/>

<wd l="1949" t="13733" r="2520" b="13877">Twitter</wd>

<space/>

<wd l="2592" t="13776" r="2755" b="13877">so</wd>

<space/>

<wd l="2822" t="13733" r="3043" b="13877">far</wd>

<space/>

<wd l="3106" t="13752" r="3254" b="13877">to</wd>

<space/>

<wd l="3326" t="13752" r="4056" b="13877">construct</wd>

<space/>

<wd l="4118" t="13776" r="4205" b="13877">a</wd>

<space/>

<wd l="4267" t="13733" r="4762" b="13877">model</wd>

<space/>

<wd l="4829" t="13733" r="5126" b="13877">that</wd>

<space/>

<wd l="5189" t="13733" r="5803" b="13877">informs</wd>

<space/>

</ln>

<ln l="1450" t="13973" r="5808" b="14160" baseLine="14107" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="14016" r="1709" b="14117">our</wd>

<space/>

<wd l="1757" t="13973" r="2386" b="14117">choices.</wd>

<space/>

<wd l="2462" t="13973" r="3427" b="14160">Specifically,</wd>

<space/>

<wd l="3480" t="14016" r="3710" b="14117">we</wd>

<space/>

<wd l="3758" t="13992" r="4354" b="14155">attempt</wd>

<space/>

<wd l="4397" t="13992" r="4546" b="14117">to</wd>

<space/>

<wd l="4598" t="13992" r="5530" b="14155">compensate</wd>

<space/>

<wd l="5578" t="13973" r="5808" b="14117">for</wd>

<space/>

</ln>

<ln l="1450" t="14208" r="5808" b="14395" baseLine="14347" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="14208" r="1891" b="14395">entity</wd>

<space/>

<wd l="1954" t="14208" r="2338" b="14376">drift;</wd>

<space/>

<wd l="2400" t="14227" r="2549" b="14352">to</wd>

<space/>

<wd l="2606" t="14208" r="3197" b="14352">harness</wd>

<space/>

<wd l="3254" t="14208" r="4306" b="14390">unsupervised</wd>

<space/>

<wd l="4358" t="14208" r="4766" b="14352">word</wd>

<space/>

<wd l="4819" t="14208" r="5597" b="14395">clustering</wd>

<space/>

<wd l="5654" t="14208" r="5808" b="14347">in</wd>

<space/>

</ln>

<ln l="1450" t="14448" r="5803" b="14635" baseLine="14587" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="14491" r="1536" b="14592">a</wd>

<space/>

<wd l="1584" t="14448" r="2395" b="14630">principled</wd>

<space/>

<wd l="2448" t="14448" r="3082" b="14616">fashion;</wd>

<space/>

<wd l="3149" t="14467" r="3298" b="14592">to</wd>

<space/>

<wd l="3355" t="14448" r="3773" b="14635">bring</wd>

<space/>

<wd l="3830" t="14448" r="3989" b="14587">in</wd>

<space/>

<wd l="4042" t="14448" r="4901" b="14635">large-scale</wd>

<space/>

<wd l="4963" t="14467" r="5803" b="14635">gazetteers;</wd>

<space/>

</ln>

<ln l="1445" t="14688" r="5813" b="14870" baseLine="14822" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="14707" r="1594" b="14832">to</wd>

<space/>

<wd l="1680" t="14707" r="2390" b="14832">attenuate</wd>

<space/>

<wd l="2472" t="14688" r="2712" b="14832">the</wd>

<space/>

<wd l="2794" t="14688" r="3336" b="14870">impact</wd>

<space/>

<wd l="3418" t="14688" r="3586" b="14832">of</wd>

<space/>

<wd l="3653" t="14707" r="4090" b="14832">terms</wd>

<space/>

<wd l="4176" t="14688" r="4838" b="14870">frequent</wd>

<space/>

<wd l="4915" t="14688" r="5074" b="14827">in</wd>

<space/>

<wd l="5146" t="14688" r="5429" b="14832">this</wd>

<space/>

<wd l="5510" t="14707" r="5813" b="14832">text</wd>

<space/>

</ln>

<ln l="1445" t="14928" r="5808" b="15115" baseLine="15062" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="14947" r="1834" b="15115">type;</wd>

<space/>

<wd l="1944" t="14928" r="2227" b="15072">and</wd>

<space/>

<wd l="2309" t="14947" r="2462" b="15072">to</wd>

<space/>

<wd l="2549" t="14928" r="2890" b="15110">pick</wd>

<space/>

<wd l="2976" t="14928" r="3259" b="15072">and</wd>

<space/>

<wd l="3346" t="14928" r="3893" b="15072">choose</wd>

<space/>

<wd l="3979" t="14928" r="4618" b="15115">targeted</wd>

<space/>

<wd l="4704" t="14947" r="5486" b="15115">gazetteers</wd>

<space/>

<wd l="5578" t="14928" r="5808" b="15072">for</wd>

<space/>

</ln>

<ln l="1454" t="15168" r="3062" b="15355" baseLine="15302" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1454" t="15168" r="2050" b="15350">specific</wd>

<space/>

<wd l="2107" t="15168" r="2554" b="15355">entity</wd>

<space/>

<wd l="2606" t="15187" r="3062" b="15355">types.</wd>

</ln>

</para>

</column>

<column l="6149" t="4120" r="10531" b="15363">

<para l="6149" t="4166" r="7363" b="4339" alignment="left" spaceBefore="3" lsp="exactly" lspExact="279" language="en">

<ln l="6149" t="4166" r="7363" b="4339" baseLine="4334" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="15">

<wd l="6149" t="4166" r="6264" b="4334">2</wd>

<space/>

<wd l="6509" t="4171" r="7363" b="4339">Datasets</wd>

</ln>

</para>

<para l="6149" t="4675" r="10526" b="6768" alignment="justified" spaceBefore="216" lsp="exactly" lspExact="240" language="en">

<ln l="6149" t="4675" r="10512" b="4862" baseLine="4810" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="4675" r="6456" b="4819">The</wd>

<space/>

<wd l="6533" t="4675" r="7147" b="4862">training</wd>

<space/>

<wd l="7234" t="4675" r="7517" b="4819">and</wd>

<space/>

<wd l="7594" t="4675" r="8616" b="4858">development</wd>

<space/>

<wd l="8698" t="4694" r="8981" b="4819">sets</wd>

<space/>

<wd l="9062" t="4675" r="9768" b="4858">provided</wd>

<space/>

<wd l="9840" t="4675" r="10195" b="4819">with</wd>

<space/>

<wd l="10272" t="4675" r="10512" b="4819">the</wd>

<space/>

</ln>

<ln l="6154" t="4915" r="10507" b="5102" baseLine="5050" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="4915" r="6912" b="5102">challenge</wd>

<space/>

<wd l="6960" t="4958" r="7349" b="5059">were</wd>

<space/>

<wd l="7402" t="4915" r="7896" b="5059">drawn</wd>

<space/>

<wd l="7944" t="4915" r="8333" b="5059">from</wd>

<space/>

<wd l="8381" t="4915" r="8621" b="5059">the</wd>

<space/>

<wd l="8674" t="4915" r="9130" b="5059">Ritter</wd>

<space/>

<wd l="9182" t="4934" r="9326" b="5059">et</wd>

<space/>

<wd l="9379" t="4915" r="9552" b="5059">al.</wd>

<space/>

<wd l="9624" t="4915" r="10138" b="5093">(2011)</wd>

<space/>

<wd l="10200" t="4958" r="10507" b="5059">cor-</wd>

</ln>

<ln l="6149" t="5150" r="10507" b="5333" baseLine="5290" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="5194" r="6466" b="5333">pus.</wd>

<space/>

<wd l="6629" t="5150" r="6974" b="5294">This</wd>

<space/>

<wd l="7061" t="5194" r="7363" b="5294">was</wd>

<space/>

<wd l="7454" t="5194" r="7541" b="5294">a</wd>

<space/>

<wd l="7627" t="5170" r="7838" b="5294">set</wd>

<space/>

<wd l="7925" t="5150" r="8093" b="5294">of</wd>

<space/>

<wd l="8170" t="5150" r="8558" b="5294">2394</wd>

<space/>

<wd l="8640" t="5170" r="9144" b="5294">tweets</wd>

<space/>

<wd l="9230" t="5150" r="9619" b="5294">from</wd>

<space/>

<wd l="9696" t="5150" r="9984" b="5294">late</wd>

<space/>

<wd l="10070" t="5150" r="10507" b="5318">2010,</wd>

<space/>

</ln>

<ln l="6154" t="5390" r="10507" b="5578" baseLine="5530" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="5390" r="6926" b="5534">annotated</wd>

<space/>

<wd l="6994" t="5390" r="7349" b="5534">with</wd>

<space/>

<wd l="7416" t="5410" r="7661" b="5534">ten</wd>

<space/>

<wd l="7733" t="5390" r="8179" b="5578">entity</wd>

<space/>

<wd l="8251" t="5410" r="8712" b="5578">types,</wd>

<space/>

<wd l="8794" t="5390" r="9542" b="5578">including</wd>

<space/>

<wd l="9614" t="5390" r="9854" b="5534">the</wd>

<space/>

<wd l="9931" t="5390" r="10507" b="5534">“other”</wd>

<space/>

</ln>

<ln l="6149" t="5630" r="10526" b="5818" baseLine="5765" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="5650" r="6528" b="5818">type.</wd>

<space/>

<wd l="6706" t="5635" r="6850" b="5770">A</wd>

<space/>

<wd l="6931" t="5630" r="7286" b="5774">later</wd>

<space/>

<wd l="7368" t="5630" r="7920" b="5774">release</wd>

<space/>

<wd l="8006" t="5630" r="8160" b="5770">in</wd>

<space/>

<wd l="8246" t="5630" r="8486" b="5774">the</wd>

<space/>

<wd l="8578" t="5630" r="9331" b="5818">challenge</wd>

<space/>

<wd l="9422" t="5674" r="9787" b="5818">gave</wd>

<space/>

<wd l="9874" t="5674" r="9960" b="5774">a</wd>

<space/>

<wd l="10051" t="5650" r="10267" b="5774">set</wd>

<space/>

<wd l="10354" t="5630" r="10526" b="5774">of</wd>

<space/>

</ln>

<ln l="6149" t="5870" r="10512" b="6058" baseLine="6005" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="5870" r="6442" b="6014">420</wd>

<space/>

<wd l="6547" t="5890" r="7051" b="6014">tweets</wd>

<space/>

<wd l="7157" t="5870" r="7546" b="6014">from</wd>

<space/>

<wd l="7646" t="5870" r="8083" b="6038">2015,</wd>

<space/>

<wd l="8208" t="5870" r="8981" b="6014">annotated</wd>

<space/>

<wd l="9077" t="5870" r="9235" b="6010">in</wd>

<space/>

<wd l="9331" t="5870" r="9571" b="6014">the</wd>

<space/>

<wd l="9686" t="5914" r="10080" b="6014">same</wd>

<space/>

<wd l="10186" t="5914" r="10512" b="6058">way</wd>

<space/>

</ln>

<ln l="6158" t="6110" r="10507" b="6293" baseLine="6245">

<wd l="6158" t="6110" r="6485" b="6288"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-1">dev</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-1"><space/>

</run>

<wd l="6562" t="6110" r="7066" b="6288"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-1">2015</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="7133" t="6115" r="7349" b="6254">As</wd>

<space/>

<wd l="7397" t="6154" r="7594" b="6254">no</wd>

<space/>

<wd l="7646" t="6110" r="8050" b="6254">other</wd>

<space/>

<wd l="8093" t="6130" r="8530" b="6254">tweet</wd>

<space/>

<wd l="8573" t="6154" r="9182" b="6293">corpora</wd>

<space/>

<wd l="9221" t="6154" r="9485" b="6254">use</wd>

<space/>

<wd l="9528" t="6110" r="9811" b="6254">this</wd>

<space/>

<wd l="9878" t="6110" r="10507" b="6254">10-class</wd>

<space/>

</run>

</ln>

<ln l="6154" t="6346" r="10517" b="6533" baseLine="6485" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="6346" r="6600" b="6533">entity</wd>

<space/>

<wd l="6648" t="6346" r="7190" b="6514">model,</wd>

<space/>

<wd l="7243" t="6389" r="7474" b="6490">we</wd>

<space/>

<wd l="7531" t="6346" r="7939" b="6490">stuck</wd>

<space/>

<wd l="7987" t="6346" r="8342" b="6490">with</wd>

<space/>

<wd l="8386" t="6346" r="8669" b="6490">this</wd>

<space/>

<wd l="8722" t="6346" r="9053" b="6490">data</wd>

<space/>

<wd l="9096" t="6346" r="9326" b="6490">for</wd>

<space/>

<wd l="9374" t="6346" r="9614" b="6490">the</wd>

<space/>

<wd l="9672" t="6346" r="10517" b="6528">supervised</wd>

<space/>

</ln>

<ln l="6149" t="6586" r="7886" b="6768" baseLine="6725" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="6610" r="6528" b="6768">parts</wd>

<space/>

<wd l="6590" t="6586" r="6763" b="6730">of</wd>

<space/>

<wd l="6806" t="6629" r="7066" b="6730">our</wd>

<space/>

<wd l="7123" t="6586" r="7886" b="6768">approach.</wd>

</ln>

</para>

<para l="6149" t="6869" r="10517" b="8726" alignment="justified" spaceBefore="33" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="6346" t="6869" r="10512" b="7056" baseLine="7008" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="6874" r="6624" b="7013">For</wd>

<space/>

<wd l="6739" t="6869" r="7454" b="7056">language</wd>

<space/>

<wd l="7574" t="6869" r="8424" b="7056">modelling,</wd>

<space/>

<wd l="8563" t="6912" r="8794" b="7013">we</wd>

<space/>

<wd l="8909" t="6869" r="9278" b="7013">used</wd>

<space/>

<wd l="9398" t="6912" r="9485" b="7013">a</wd>

<space/>

<wd l="9605" t="6888" r="9821" b="7013">set</wd>

<space/>

<wd l="9936" t="6869" r="10109" b="7013">of</wd>

<space/>

<wd l="10219" t="6869" r="10512" b="7013">250</wd>

<space/>

</ln>

<ln l="6149" t="7109" r="10507" b="7296" baseLine="7243" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="7109" r="6725" b="7253">million</wd>

<space/>

<wd l="6816" t="7128" r="7315" b="7253">tweets</wd>

<space/>

<wd l="7416" t="7109" r="7910" b="7253">drawn</wd>

<space/>

<wd l="7997" t="7109" r="8386" b="7253">from</wd>

<space/>

<wd l="8472" t="7109" r="8717" b="7253">the</wd>

<space/>

<wd l="8808" t="7109" r="9379" b="7253">Twitter</wd>

<space/>

<wd l="9470" t="7109" r="10008" b="7296">garden</wd>

<space/>

<wd l="10099" t="7109" r="10507" b="7277">hose,</wd>

<space/>

</ln>

<ln l="6149" t="7349" r="10507" b="7536" baseLine="7483" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="7349" r="6638" b="7493">which</wd>

<space/>

<wd l="6696" t="7349" r="6826" b="7493">is</wd>

<space/>

<wd l="6898" t="7392" r="6984" b="7493">a</wd>

<space/>

<wd l="7042" t="7349" r="7315" b="7493">fair</wd>

<space/>

<wd l="7397" t="7349" r="7738" b="7493">10%</wd>

<space/>

<wd l="7814" t="7349" r="8366" b="7531">sample</wd>

<space/>

<wd l="8438" t="7349" r="8606" b="7493">of</wd>

<space/>

<wd l="8664" t="7349" r="8856" b="7493">all</wd>

<space/>

<wd l="8918" t="7368" r="9422" b="7493">tweets</wd>

<space/>

<wd l="9499" t="7349" r="9998" b="7536">(Kergl</wd>

<space/>

<wd l="10070" t="7368" r="10210" b="7493">et</wd>

<space/>

<wd l="10277" t="7349" r="10507" b="7517">al.,</wd>

<space/>

</ln>

<ln l="6154" t="7589" r="10512" b="7776" baseLine="7723" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="7589" r="6653" b="7766">2014).</wd>

<space/>

<wd l="6725" t="7589" r="7195" b="7733">These</wd>

<space/>

<wd l="7243" t="7632" r="7627" b="7733">were</wd>

<space/>

<wd l="7670" t="7589" r="8304" b="7733">reduced</wd>

<space/>

<wd l="8347" t="7608" r="8496" b="7733">to</wd>

<space/>

<wd l="8530" t="7589" r="8837" b="7776">just</wd>

<space/>

<wd l="8875" t="7589" r="9485" b="7776">English</wd>

<space/>

<wd l="9528" t="7608" r="10032" b="7733">tweets</wd>

<space/>

<wd l="10080" t="7589" r="10512" b="7776">using</wd>

<space/>

</ln>

<ln l="6149" t="7829" r="10517" b="8016" baseLine="7963" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="7829" r="6888" b="8016">langid.py</wd>

<space/>

<wd l="6950" t="7829" r="7282" b="8006">(Lui</wd>

<space/>

<wd l="7339" t="7829" r="7622" b="7973">and</wd>

<space/>

<wd l="7670" t="7829" r="8390" b="7997">Baldwin,</wd>

<space/>

<wd l="8453" t="7829" r="8957" b="8006">2012),</wd>

<space/>

<wd l="9014" t="7829" r="9302" b="7973">and</wd>

<space/>

<wd l="9350" t="7829" r="9691" b="7973">then</wd>

<space/>

<wd l="9739" t="7829" r="10517" b="7973">tokenized</wd>

<space/>

</ln>

<ln l="6149" t="8064" r="10517" b="8251" baseLine="8203" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="8064" r="6576" b="8251">using</wd>

<space/>

<wd l="6634" t="8064" r="6878" b="8208">the</wd>

<space/>

<wd l="6936" t="8064" r="7814" b="8208">twokenizer</wd>

<space/>

<wd l="7872" t="8064" r="8179" b="8208">tool</wd>

<space/>

<wd l="8246" t="8064" r="8899" b="8242">(Connor</wd>

<space/>

<wd l="8962" t="8083" r="9101" b="8208">et</wd>

<space/>

<wd l="9158" t="8064" r="9394" b="8232">al.,</wd>

<space/>

<wd l="9461" t="8064" r="9965" b="8242">2010),</wd>

<space/>

<wd l="10027" t="8064" r="10517" b="8208">which</wd>

<space/>

</ln>

<ln l="6154" t="8304" r="10517" b="8491" baseLine="8443" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="8304" r="6557" b="8491">gives</wd>

<space/>

<wd l="6638" t="8304" r="6878" b="8448">the</wd>

<space/>

<wd l="6970" t="8347" r="7363" b="8448">same</wd>

<space/>

<wd l="7445" t="8304" r="8429" b="8448">tokenization</wd>

<space/>

<wd l="8510" t="8347" r="8664" b="8448">as</wd>

<space/>

<wd l="8746" t="8304" r="9110" b="8448">used</wd>

<space/>

<wd l="9187" t="8304" r="9346" b="8443">in</wd>

<space/>

<wd l="9418" t="8304" r="9662" b="8448">the</wd>

<space/>

<wd l="9739" t="8304" r="10152" b="8486">input</wd>

<space/>

<wd l="10229" t="8304" r="10517" b="8448">and</wd>

<space/>

</ln>

<ln l="6154" t="8544" r="7666" b="8726" baseLine="8678" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="8544" r="6970" b="8688">evaluation</wd>

<space/>

<wd l="7022" t="8587" r="7666" b="8726">corpora.</wd>

</ln>

</para>

<para l="6149" t="8827" r="10517" b="10680" alignment="justified" spaceBefore="39" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="6346" t="8827" r="10512" b="9014" baseLine="8962" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="8832" r="6514" b="8966">In</wd>

<space/>

<wd l="6566" t="8827" r="7258" b="8995">addition,</wd>

<space/>

<wd l="7310" t="8870" r="7541" b="8971">we</wd>

<space/>

<wd l="7589" t="8827" r="7954" b="8971">used</wd>

<space/>

<wd l="7997" t="8827" r="8395" b="8971">three</wd>

<space/>

<wd l="8453" t="8870" r="9034" b="8971">sources</wd>

<space/>

<wd l="9091" t="8827" r="9264" b="8971">of</wd>

<space/>

<wd l="9302" t="8846" r="10133" b="9014">gazetteers.</wd>

<space/>

<wd l="10205" t="8827" r="10512" b="8971">The</wd>

<space/>

</ln>

<ln l="6154" t="9067" r="10517" b="9254" baseLine="9202" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="9067" r="6461" b="9211">first</wd>

<space/>

<wd l="6538" t="9086" r="6830" b="9211">two</wd>

<space/>

<wd l="6912" t="9110" r="7301" b="9211">were</wd>

<space/>

<wd l="7378" t="9067" r="8117" b="9254">manually</wd>

<space/>

<wd l="8203" t="9067" r="8818" b="9235">created,</wd>

<space/>

<wd l="8918" t="9067" r="9202" b="9211">and</wd>

<space/>

<wd l="9283" t="9067" r="9902" b="9211">covered</wd>

<space/>

<wd l="9984" t="9067" r="10517" b="9211">named</wd>

<space/>

</ln>

<ln l="6149" t="9307" r="10517" b="9490" baseLine="9442" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="9307" r="6854" b="9490">temporal</wd>

<space/>

<wd l="6946" t="9307" r="7862" b="9490">expressions</wd>

<space/>

<wd l="7958" t="9307" r="8640" b="9485">(Brucato</wd>

<space/>

<wd l="8731" t="9326" r="8875" b="9451">et</wd>

<space/>

<wd l="8957" t="9307" r="9192" b="9475">al.,</wd>

<space/>

<wd l="9293" t="9307" r="9744" b="9485">2013)</wd>

<space/>

<wd l="9840" t="9307" r="10123" b="9451">and</wd>

<space/>

<wd l="10210" t="9307" r="10517" b="9451">first</wd>

<space/>

</ln>

<ln l="6149" t="9542" r="10517" b="9730" baseLine="9682" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="9586" r="6682" b="9725">person</wd>

<space/>

<wd l="6773" t="9586" r="7272" b="9686">names</wd>

<space/>

<wd l="7378" t="9542" r="8472" b="9730">(Cunningham</wd>

<space/>

<wd l="8563" t="9562" r="8702" b="9686">et</wd>

<space/>

<wd l="8798" t="9542" r="9029" b="9710">al.,</wd>

<space/>

<wd l="9139" t="9542" r="9638" b="9720">2002).</wd>

<space/>

<wd l="9835" t="9542" r="10142" b="9686">The</wd>

<space/>

<wd l="10238" t="9542" r="10517" b="9686">last</wd>

<space/>

</ln>

<ln l="6154" t="9782" r="10517" b="9970" baseLine="9922" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="9782" r="6979" b="9965">comprised</wd>

<space/>

<wd l="7027" t="9826" r="7435" b="9926">more</wd>

<space/>

<wd l="7483" t="9802" r="7973" b="9926">recent</wd>

<space/>

<wd l="8026" t="9782" r="8395" b="9950">data,</wd>

<space/>

<wd l="8453" t="9782" r="8947" b="9926">drawn</wd>

<space/>

<wd l="9000" t="9782" r="10075" b="9970">automatically</wd>

<space/>

<wd l="10128" t="9782" r="10517" b="9926">from</wd>

<space/>

</ln>

<ln l="6149" t="10022" r="10512" b="10205" baseLine="10157" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="10022" r="6854" b="10166">Freebase</wd>

<space/>

<wd l="6936" t="10066" r="7090" b="10166">as</wd>

<space/>

<wd l="7171" t="10046" r="7483" b="10205">part</wd>

<space/>

<wd l="7560" t="10022" r="7733" b="10166">of</wd>

<space/>

<wd l="7800" t="10066" r="7886" b="10166">a</wd>

<space/>

<wd l="7963" t="10022" r="8491" b="10166">distant</wd>

<space/>

<wd l="8573" t="10022" r="9480" b="10205">supervision</wd>

<space/>

<wd l="9562" t="10022" r="10286" b="10205">approach</wd>

<space/>

<wd l="10358" t="10042" r="10512" b="10166">to</wd>

<space/>

</ln>

<ln l="6154" t="10262" r="10517" b="10450" baseLine="10397" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="10262" r="6600" b="10450">entity</wd>

<space/>

<wd l="6662" t="10262" r="7387" b="10406">detection</wd>

<space/>

<wd l="7445" t="10262" r="7728" b="10406">and</wd>

<space/>

<wd l="7786" t="10262" r="8395" b="10406">relation</wd>

<space/>

<wd l="8453" t="10262" r="9288" b="10406">annotation</wd>

<space/>

<wd l="9350" t="10262" r="10315" b="10450">(Augenstein</wd>

<space/>

<wd l="10373" t="10282" r="10517" b="10406">et</wd>

<space/>

</ln>

<ln l="6154" t="10502" r="6946" b="10680" baseLine="10637" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="10502" r="6384" b="10670">al.,</wd>

<space/>

<wd l="6446" t="10502" r="6946" b="10680">2014).</wd>

</ln>

</para>

<para l="6149" t="11074" r="7301" b="11246" alignment="left" spaceBefore="336" lsp="exactly" lspExact="279" language="en">

<ln l="6149" t="11074" r="7301" b="11246" baseLine="11237" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="19">

<wd l="6149" t="11074" r="6264" b="11246">3</wd>

<space/>

<wd l="6509" t="11078" r="7301" b="11246">Method</wd>

</ln>

</para>

<para l="6149" t="11578" r="10526" b="15355" alignment="justified" spaceBefore="210" lsp="exactly" lspExact="239" language="en">

<ln l="6149" t="11578" r="10507" b="11760" baseLine="11717" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="11578" r="6456" b="11722">The</wd>

<space/>

<wd l="6528" t="11582" r="7128" b="11722">WNUT</wd>

<space/>

<wd l="7200" t="11578" r="7771" b="11722">Twitter</wd>

<space/>

<wd l="7843" t="11582" r="8242" b="11722">NER</wd>

<space/>

<wd l="8314" t="11578" r="8635" b="11722">task</wd>

<space/>

<wd l="8702" t="11578" r="9370" b="11760">required</wd>

<space/>

<wd l="9442" t="11621" r="9614" b="11722">us</wd>

<space/>

<wd l="9691" t="11597" r="9840" b="11722">to</wd>

<space/>

<wd l="9922" t="11578" r="10507" b="11722">address</wd>

<space/>

</ln>

<ln l="6149" t="11818" r="10507" b="12005" baseLine="11957" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="11861" r="6586" b="12005">many</wd>

<space/>

<wd l="6648" t="11818" r="6979" b="11962">data</wd>

<space/>

<wd l="7046" t="11818" r="7651" b="12005">sparsity</wd>

<space/>

<wd l="7718" t="11818" r="8592" b="12005">challenges.</wd>

<space/>

<wd l="8688" t="11818" r="9240" b="12005">Firstly,</wd>

<space/>

<wd l="9302" t="11818" r="9547" b="11962">the</wd>

<space/>

<wd l="9610" t="11818" r="10229" b="11962">datasets</wd>

<space/>

<wd l="10291" t="11818" r="10507" b="11957">in-</wd>

</ln>

<ln l="6149" t="12058" r="10507" b="12245" baseLine="12192" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="12058" r="6686" b="12202">volved</wd>

<space/>

<wd l="6758" t="12101" r="6994" b="12202">are</wd>

<space/>

<wd l="7080" t="12058" r="7608" b="12245">simply</wd>

<space/>

<wd l="7680" t="12101" r="8030" b="12245">very</wd>

<space/>

<wd l="8112" t="12058" r="8578" b="12226">small,</wd>

<space/>

<wd l="8659" t="12058" r="9254" b="12245">making</wd>

<space/>

<wd l="9326" t="12058" r="9442" b="12202">it</wd>

<space/>

<wd l="9509" t="12058" r="9864" b="12202">hard</wd>

<space/>

<wd l="9936" t="12077" r="10085" b="12202">to</wd>

<space/>

<wd l="10166" t="12101" r="10507" b="12245">gen-</wd>

</ln>

<ln l="6154" t="12298" r="10517" b="12485" baseLine="12432" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="12298" r="6667" b="12442">eralise</wd>

<space/>

<wd l="6725" t="12298" r="6883" b="12437">in</wd>

<space/>

<wd l="6950" t="12298" r="7795" b="12480">supervised</wd>

<space/>

<wd l="7853" t="12298" r="8549" b="12485">learning,</wd>

<space/>

<wd l="8621" t="12298" r="8904" b="12442">and</wd>

<space/>

<wd l="8962" t="12298" r="9643" b="12485">meaning</wd>

<space/>

<wd l="9706" t="12298" r="10008" b="12442">that</wd>

<space/>

<wd l="10070" t="12298" r="10517" b="12442">effect</wd>

<space/>

</ln>

<ln l="6158" t="12538" r="10517" b="12725" baseLine="12672" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6158" t="12538" r="6528" b="12682">sizes</wd>

<space/>

<wd l="6610" t="12557" r="7142" b="12682">cannot</wd>

<space/>

<wd l="7210" t="12538" r="7397" b="12682">be</wd>

<space/>

<wd l="7469" t="12538" r="8074" b="12725">reliably</wd>

<space/>

<wd l="8150" t="12538" r="8952" b="12682">measured.</wd>

<space/>

<wd l="9091" t="12538" r="9859" b="12725">Secondly,</wd>

<space/>

<wd l="9941" t="12538" r="10517" b="12682">Twitter</wd>

<space/>

</ln>

<ln l="6149" t="12773" r="10507" b="12960" baseLine="12912" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="12773" r="6864" b="12960">language</wd>

<space/>

<wd l="6941" t="12773" r="7066" b="12917">is</wd>

<space/>

<wd l="7147" t="12773" r="7834" b="12960">arguably</wd>

<space/>

<wd l="7915" t="12816" r="8194" b="12917">one</wd>

<space/>

<wd l="8275" t="12773" r="8443" b="12917">of</wd>

<space/>

<wd l="8506" t="12773" r="8746" b="12917">the</wd>

<space/>

<wd l="8822" t="12773" r="9432" b="12917">noisiest</wd>

<space/>

<wd l="9504" t="12773" r="9792" b="12917">and</wd>

<space/>

<wd l="9859" t="12773" r="10507" b="12960">idiosyn-</wd>

</ln>

<ln l="6154" t="13013" r="10526" b="13200" baseLine="13152" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="13013" r="6590" b="13157">cratic</wd>

<space/>

<wd l="6634" t="13032" r="6931" b="13157">text</wd>

<space/>

<wd l="6974" t="13056" r="7536" b="13200">genres,</wd>

<space/>

<wd l="7584" t="13013" r="8074" b="13157">which</wd>

<space/>

<wd l="8112" t="13013" r="8870" b="13157">manifests</wd>

<space/>

<wd l="8923" t="13056" r="9082" b="13157">as</wd>

<space/>

<wd l="9134" t="13056" r="9221" b="13157">a</wd>

<space/>

<wd l="9259" t="13013" r="9653" b="13200">large</wd>

<space/>

<wd l="9696" t="13013" r="10306" b="13157">number</wd>

<space/>

<wd l="10354" t="13013" r="10526" b="13157">of</wd>

<space/>

</ln>

<ln l="6149" t="13253" r="10512" b="13440" baseLine="13387" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13253" r="6557" b="13397">word</wd>

<space/>

<wd l="6614" t="13272" r="7080" b="13440">types,</wd>

<space/>

<wd l="7152" t="13253" r="7440" b="13397">and</wd>

<space/>

<wd l="7498" t="13296" r="7843" b="13440">very</wd>

<space/>

<wd l="7910" t="13253" r="8299" b="13440">large</wd>

<space/>

<wd l="8362" t="13253" r="9355" b="13397">vocabularies</wd>

<space/>

<wd l="9427" t="13253" r="9710" b="13397">due</wd>

<space/>

<wd l="9768" t="13272" r="9922" b="13397">to</wd>

<space/>

<wd l="9984" t="13253" r="10512" b="13397">lexical</wd>

<space/>

</ln>

<ln l="6149" t="13493" r="10517" b="13680" baseLine="13627" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13493" r="6854" b="13637">variation</wd>

<space/>

<wd l="6912" t="13493" r="7834" b="13670">(Eisenstein,</wd>

<space/>

<wd l="7896" t="13493" r="8395" b="13670">2013).</wd>

<space/>

<wd l="8472" t="13493" r="9101" b="13680">Thirdly,</wd>

<space/>

<wd l="9158" t="13493" r="9403" b="13637">the</wd>

<space/>

<wd l="9456" t="13493" r="10171" b="13680">language</wd>

<space/>

<wd l="10229" t="13493" r="10517" b="13637">and</wd>

<space/>

</ln>

<ln l="6154" t="13733" r="10507" b="13920" baseLine="13867" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="13733" r="6941" b="13920">especially</wd>

<space/>

<wd l="7027" t="13733" r="7594" b="13877">entities</wd>

<space/>

<wd l="7675" t="13733" r="8146" b="13877">found</wd>

<space/>

<wd l="8222" t="13733" r="8376" b="13872">in</wd>

<space/>

<wd l="8453" t="13752" r="8957" b="13877">tweets</wd>

<space/>

<wd l="9048" t="13733" r="9605" b="13920">change</wd>

<space/>

<wd l="9686" t="13776" r="10032" b="13877">over</wd>

<space/>

<wd l="10109" t="13733" r="10507" b="13901">time,</wd>

<space/>

</ln>

<ln l="6149" t="13973" r="10512" b="14160" baseLine="14107">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6149" t="13973" r="6638" b="14117">which</wd>

<space/>

<wd l="6701" t="13973" r="6826" b="14117">is</wd>

<space/>

<wd l="6902" t="13973" r="7747" b="14160">commonly</wd>

<space/>

<wd l="7814" t="13973" r="8448" b="14117">referred</wd>

<space/>

<wd l="8510" t="13992" r="8664" b="14117">to</wd>

<space/>

<wd l="8736" t="14016" r="8894" b="14117">as</wd>

<space/>

</run>

<wd l="8962" t="13973" r="9346" b="14155"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">drift</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="9466" t="13973" r="9773" b="14117">The</wd>

<space/>

<wd l="9840" t="13973" r="10512" b="14160">majority</wd>

<space/>

</run>

</ln>

<ln l="6154" t="14208" r="10517" b="14395" baseLine="14347" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="14208" r="6326" b="14352">of</wd>

<space/>

<wd l="6384" t="14208" r="6624" b="14352">the</wd>

<space/>

<wd l="6696" t="14213" r="7296" b="14352">WNUT</wd>

<space/>

<wd l="7368" t="14208" r="7982" b="14395">training</wd>

<space/>

<wd l="8059" t="14208" r="8390" b="14352">data</wd>

<space/>

<wd l="8458" t="14208" r="8587" b="14352">is</wd>

<space/>

<wd l="8659" t="14208" r="9053" b="14352">from</wd>

<space/>

<wd l="9125" t="14208" r="9562" b="14376">2010,</wd>

<space/>

<wd l="9648" t="14208" r="9931" b="14352">and</wd>

<space/>

<wd l="10003" t="14208" r="10354" b="14395">only</wd>

<space/>

<wd l="10430" t="14251" r="10517" b="14352">a</wd>

<space/>

</ln>

<ln l="6158" t="14448" r="10507" b="14635" baseLine="14587" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6158" t="14448" r="6576" b="14592">small</wd>

<space/>

<wd l="6658" t="14467" r="7253" b="14592">amount</wd>

<space/>

<wd l="7320" t="14448" r="7709" b="14592">from</wd>

<space/>

<wd l="7781" t="14448" r="8218" b="14616">2015,</wd>

<space/>

<wd l="8304" t="14448" r="8885" b="14635">leading</wd>

<space/>

<wd l="8962" t="14467" r="9110" b="14592">to</wd>

<space/>

<wd l="9192" t="14491" r="9278" b="14592">a</wd>

<space/>

<wd l="9355" t="14448" r="9960" b="14635">sparsity</wd>

<space/>

<wd l="10037" t="14448" r="10190" b="14587">in</wd>

<space/>

<wd l="10267" t="14491" r="10507" b="14592">ex-</wd>

</ln>

<ln l="6154" t="14688" r="10512" b="14875" baseLine="14822" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="14688" r="6706" b="14870">amples</wd>

<space/>

<wd l="6763" t="14688" r="6931" b="14832">of</wd>

<space/>

<wd l="6965" t="14688" r="7579" b="14832">modern</wd>

<space/>

<wd l="7622" t="14688" r="8376" b="14875">language.</wd>

<space/>

<wd l="8448" t="14688" r="9278" b="14856">Therefore,</wd>

<space/>

<wd l="9331" t="14688" r="9485" b="14827">in</wd>

<space/>

<wd l="9533" t="14731" r="9797" b="14832">our</wd>

<space/>

<wd l="9840" t="14688" r="10512" b="14832">machine</wd>

<space/>

</ln>

<ln l="6149" t="14928" r="10512" b="15115" baseLine="15062" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="14928" r="6797" b="15115">learning</wd>

<space/>

<wd l="6874" t="14928" r="7642" b="15110">approach,</wd>

<space/>

<wd l="7718" t="14971" r="8155" b="15115">many</wd>

<space/>

<wd l="8227" t="14928" r="8400" b="15072">of</wd>

<space/>

<wd l="8458" t="14928" r="8698" b="15072">the</wd>

<space/>

<wd l="8765" t="14928" r="9389" b="15072">features</wd>

<space/>

<wd l="9461" t="14971" r="9691" b="15072">we</wd>

<space/>

<wd l="9763" t="14928" r="10512" b="15072">introduce</wd>

<space/>

</ln>

<ln l="6154" t="15168" r="8376" b="15355" baseLine="15302" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="15211" r="6389" b="15312">are</wd>

<space/>

<wd l="6442" t="15168" r="6840" b="15312">there</wd>

<space/>

<wd l="6888" t="15187" r="7042" b="15312">to</wd>

<space/>

<wd l="7099" t="15168" r="7685" b="15312">combat</wd>

<space/>

<wd l="7742" t="15168" r="8376" b="15355">sparsity.</wd>

</ln>

</para>

</column>

</section>

<section l="1440" t="15363" r="10531" b="16480">

<column l="1440" t="15363" r="10531" b="16480">

<para l="5800" t="15792" r="6143" b="15946" alignment="centered" spaceBefore="381" lsp="exactly" lspExact="249" language="en">

<ln l="5866" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="40">

<wd l="5866" t="15792" r="6077" b="15946">48</wd>

</ln>

</para>

<para l="2918" t="16133" r="8981" b="16469" alignment="centered" spaceBefore="139" lsp="exactly" lspExact="170" language="en">

<ln l="2918" t="16133" r="8981" b="16301" baseLine="16253" forcedEOF="true">

<run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2918" t="16133" r="3802" b="16296">Proceedings</wd>

<space/>

<wd l="3854" t="16133" r="4018" b="16296">of</wd>

<space/>

<wd l="4037" t="16133" r="4248" b="16262">the</wd>

<space/>

<wd l="4286" t="16138" r="4622" b="16262">ACL</wd>

<space/>

<wd l="4666" t="16133" r="5026" b="16262">2015</wd>

<space/>

<wd l="5078" t="16133" r="5779" b="16296">Workshop</wd>

<space/>

<wd l="5832" t="16176" r="6000" b="16262">on</wd>

<space/>

<wd l="6043" t="16138" r="6456" b="16296">Noisy</wd>

<space/>

<wd l="6518" t="16133" r="7627" b="16296">User-generated</wd>

<space/>

</run>

<wd l="7675" t="16138" r="7992" b="16286"><run italic="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Text</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8045" t="16171" r="8443" b="16301">pages</wd>

<space/>

<wd l="8496" t="16133" r="8981" b="16286">48–53,
</wd>

</run>

</ln>

<ln l="3029" t="16296" r="8870" b="16469" baseLine="16425" forcedEOF="true">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3029" t="16301" r="3595" b="16469">Beijing,</wd>

<space/>

<wd l="3653" t="16301" r="4114" b="16454">China,</wd>

<space/>

<wd l="4166" t="16301" r="4459" b="16469">July</wd>

<space/>

<wd l="4512" t="16301" r="4723" b="16454">31,</wd>

<space/>

<wd l="4781" t="16301" r="5170" b="16430">2015.</wd>

<space/>

</run>

<wd l="5246" t="16296" r="5770" b="16469"><run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">c</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">�</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2015</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5822" t="16301" r="6672" b="16430">Association</wd>

<space/>

<wd l="6715" t="16301" r="6926" b="16430">for</wd>

<space/>

<wd l="6974" t="16301" r="8035" b="16469">Computational</wd>

<space/>

<wd l="8078" t="16301" r="8870" b="16469">Linguistics</wd>

</run>

</ln>

</para>

</column>

</section>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4306.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1445" marginTop="1320" marginRight="1109" marginBottom="1302" offsetX="8" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1445" t="1320" r="10800" b="15408">

<column l="1445" t="1320" r="5822" b="15408">

<para l="1445" t="1363" r="3998" b="1546" alignment="left" lsp="exactly" lspExact="234" language="en">

<ln l="1445" t="1363" r="3998" b="1546" baseLine="1498" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="5">

<wd l="1445" t="1363" r="1685" b="1507">3.1</wd>

<space/>

<wd l="1891" t="1363" r="3053" b="1546">Unsupervised</wd>

<space/>

<wd l="3110" t="1363" r="3998" b="1546">Clustering</wd>

</ln>

</para>

<para l="1445" t="1694" r="5818" b="6418" alignment="justified" spaceBefore="73" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="1694" r="5808" b="1882" baseLine="1829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="1699" r="1699" b="1838">We</wd>

<space/>

<wd l="1752" t="1738" r="2016" b="1838">use</wd>

<space/>

<wd l="2069" t="1738" r="2246" b="1838">an</wd>

<space/>

<wd l="2299" t="1694" r="3350" b="1877">unsupervised</wd>

<space/>

<wd l="3403" t="1694" r="4176" b="1882">clustering</wd>

<space/>

<wd l="4234" t="1694" r="4406" b="1838">of</wd>

<space/>

<wd l="4440" t="1714" r="4877" b="1838">terms</wd>

<space/>

<wd l="4930" t="1714" r="5083" b="1838">to</wd>

<space/>

<wd l="5136" t="1714" r="5808" b="1882">generate</wd>

<space/>

</ln>

<ln l="1445" t="1934" r="5798" b="2122" baseLine="2069" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="1934" r="1853" b="2078">word</wd>

<space/>

<wd l="1915" t="1954" r="2256" b="2122">type</wd>

<space/>

<wd l="2328" t="1934" r="2995" b="2078">features.</wd>

<space/>

<wd l="3120" t="1934" r="3427" b="2078">The</wd>

<space/>

<wd l="3499" t="1934" r="3835" b="2122">goal</wd>

<space/>

<wd l="3907" t="1934" r="4080" b="2078">of</wd>

<space/>

<wd l="4138" t="1934" r="4421" b="2078">this</wd>

<space/>

<wd l="4493" t="1934" r="4618" b="2078">is</wd>

<space/>

<wd l="4690" t="1954" r="4843" b="2078">to</wd>

<space/>

<wd l="4915" t="1934" r="5256" b="2122">gain</wd>

<space/>

<wd l="5328" t="1978" r="5414" b="2078">a</wd>

<space/>

<wd l="5477" t="1978" r="5798" b="2117">pro-</wd>

</ln>

<ln l="1450" t="2170" r="5798" b="2357" baseLine="2309" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="2170" r="2088" b="2357">gressive</wd>

<space/>

<wd l="2160" t="2170" r="2914" b="2314">reduction</wd>

<space/>

<wd l="2986" t="2170" r="3144" b="2309">in</wd>

<space/>

<wd l="3216" t="2170" r="3456" b="2314">the</wd>

<space/>

<wd l="3533" t="2170" r="4301" b="2352">profusion</wd>

<space/>

<wd l="4378" t="2170" r="4546" b="2314">of</wd>

<space/>

<wd l="4613" t="2170" r="5021" b="2314">word</wd>

<space/>

<wd l="5093" t="2189" r="5506" b="2357">types</wd>

<space/>

<wd l="5587" t="2170" r="5798" b="2309">in-</wd>

</ln>

<ln l="1445" t="2410" r="5798" b="2597" baseLine="2549" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="2410" r="1939" b="2554">trinsic</wd>

<space/>

<wd l="1997" t="2429" r="2150" b="2554">to</wd>

<space/>

<wd l="2213" t="2410" r="2453" b="2554">the</wd>

<space/>

<wd l="2510" t="2429" r="2808" b="2554">text</wd>

<space/>

<wd l="2866" t="2429" r="3245" b="2597">type.</wd>

<space/>

<wd l="3346" t="2410" r="3638" b="2554">250</wd>

<space/>

<wd l="3701" t="2410" r="4277" b="2554">million</wd>

<space/>

<wd l="4330" t="2429" r="4834" b="2554">tweets</wd>

<space/>

<wd l="4901" t="2410" r="5290" b="2554">from</wd>

<space/>

<wd l="5347" t="2410" r="5798" b="2554">2010-</wd>

<space/>

</ln>

<ln l="1450" t="2650" r="5803" b="2837" baseLine="2784" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="2650" r="1838" b="2794">2012</wd>

<space/>

<wd l="1910" t="2693" r="2294" b="2794">were</wd>

<space/>

<wd l="2366" t="2650" r="2731" b="2794">used</wd>

<space/>

<wd l="2798" t="2669" r="2947" b="2794">to</wd>

<space/>

<wd l="3024" t="2669" r="3691" b="2837">generate</wd>

<space/>

<wd l="3768" t="2650" r="4210" b="2818">2,000</wd>

<space/>

<wd l="4282" t="2650" r="4690" b="2794">word</wd>

<space/>

<wd l="4762" t="2650" r="5304" b="2794">classes</wd>

<space/>

<wd l="5376" t="2650" r="5803" b="2837">using</wd>

<space/>

</ln>

<ln l="1445" t="2890" r="5803" b="3077" baseLine="3024" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="2894" r="1982" b="3034">Brown</wd>

<space/>

<wd l="2030" t="2890" r="2808" b="3077">clustering</wd>

<space/>

<wd l="2870" t="2890" r="3466" b="3067">(Brown</wd>

<space/>

<wd l="3514" t="2909" r="3658" b="3034">et</wd>

<space/>

<wd l="3706" t="2890" r="3936" b="3058">al.,</wd>

<space/>

<wd l="4008" t="2890" r="4493" b="3067">1992).</wd>

<space/>

<wd l="4565" t="2890" r="5309" b="3077">Typically</wd>

<space/>

<wd l="5381" t="2890" r="5803" b="3058">1,000</wd>

<space/>

</ln>

<ln l="1450" t="3130" r="5798" b="3317" baseLine="3264" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="3173" r="1608" b="3274">or</wd>

<space/>

<wd l="1646" t="3130" r="2098" b="3274">fewer</wd>

<space/>

<wd l="2141" t="3173" r="2376" b="3274">are</wd>

<space/>

<wd l="2419" t="3130" r="2832" b="3298">used;</wd>

<space/>

<wd l="2880" t="3130" r="3120" b="3274">the</wd>

<space/>

<wd l="3163" t="3130" r="3624" b="3317">larger</wd>

<space/>

<wd l="3662" t="3130" r="4272" b="3274">number</wd>

<space/>

<wd l="4315" t="3130" r="4488" b="3274">of</wd>

<space/>

<wd l="4522" t="3130" r="5064" b="3274">classes</wd>

<space/>

<wd l="5107" t="3173" r="5410" b="3274">was</wd>

<space/>

<wd l="5458" t="3130" r="5798" b="3274">cho-</wd>

</ln>

<ln l="1454" t="3365" r="5818" b="3552" baseLine="3504" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1454" t="3408" r="1709" b="3509">sen</wd>

<space/>

<wd l="1781" t="3365" r="2410" b="3509">because</wd>

<space/>

<wd l="2486" t="3365" r="2602" b="3509">it</wd>

<space/>

<wd l="2674" t="3365" r="3389" b="3552">helpfully</wd>

<space/>

<wd l="3466" t="3365" r="4219" b="3509">increased</wd>

<space/>

<wd l="4296" t="3365" r="4536" b="3509">the</wd>

<space/>

<wd l="4618" t="3365" r="5563" b="3552">expressivity</wd>

<space/>

<wd l="5645" t="3365" r="5818" b="3509">of</wd>

<space/>

</ln>

<ln l="1445" t="3605" r="5798" b="3792" baseLine="3744" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="3605" r="1685" b="3749">the</wd>

<space/>

<wd l="1738" t="3605" r="2870" b="3787">representation</wd>

<space/>

<wd l="2928" t="3605" r="3888" b="3792">(Derczynski</wd>

<space/>

<wd l="3950" t="3624" r="4090" b="3749">et</wd>

<space/>

<wd l="4147" t="3605" r="4378" b="3773">al.,</wd>

<space/>

<wd l="4440" t="3605" r="5035" b="3782">2015a),</wd>

<space/>

<wd l="5093" t="3605" r="5534" b="3749">while</wd>

<space/>

<wd l="5587" t="3648" r="5798" b="3749">re-</wd>

</ln>

<ln l="1445" t="3845" r="5808" b="4032" baseLine="3979" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="3845" r="1992" b="4032">taining</wd>

<space/>

<wd l="2064" t="3888" r="2150" b="3989">a</wd>

<space/>

<wd l="2208" t="3845" r="2693" b="3989">useful</wd>

<space/>

<wd l="2765" t="3845" r="3370" b="4032">sparsity</wd>

<space/>

<wd l="3432" t="3845" r="4224" b="3989">reduction.</wd>

<space/>

<wd l="4330" t="3845" r="4805" b="3989">These</wd>

<space/>

<wd l="4867" t="3845" r="5808" b="3989">hierarchical</wd>

<space/>

</ln>

<ln l="1450" t="4085" r="5798" b="4272" baseLine="4219" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="4085" r="1992" b="4229">classes</wd>

<space/>

<wd l="2069" t="4128" r="2453" b="4229">were</wd>

<space/>

<wd l="2530" t="4085" r="3451" b="4267">represented</wd>

<space/>

<wd l="3523" t="4085" r="3950" b="4272">using</wd>

<space/>

<wd l="4027" t="4085" r="4243" b="4229">bit</wd>

<space/>

<wd l="4320" t="4085" r="4829" b="4267">depths</wd>

<space/>

<wd l="4910" t="4085" r="5083" b="4229">of</wd>

<space/>

<wd l="5150" t="4085" r="5510" b="4229">3-10</wd>

<space/>

<wd l="5587" t="4085" r="5798" b="4224">in-</wd>

</ln>

<ln l="1450" t="4325" r="5808" b="4507" baseLine="4459" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="4325" r="2045" b="4493">clusive,</wd>

<space/>

<wd l="2102" t="4325" r="2390" b="4469">and</wd>

<space/>

<wd l="2438" t="4325" r="2779" b="4469">then</wd>

<space/>

<wd l="2851" t="4325" r="3072" b="4493">12,</wd>

<space/>

<wd l="3149" t="4325" r="3370" b="4493">14,</wd>

<space/>

<wd l="3446" t="4325" r="3672" b="4493">16,</wd>

<space/>

<wd l="3744" t="4325" r="3917" b="4469">18</wd>

<space/>

<wd l="3979" t="4325" r="4267" b="4469">and</wd>

<space/>

<wd l="4320" t="4325" r="4555" b="4493">20,</wd>

<space/>

<wd l="4618" t="4368" r="4901" b="4469">one</wd>

<space/>

<wd l="4949" t="4325" r="5501" b="4469">feature</wd>

<space/>

<wd l="5554" t="4368" r="5808" b="4507">per</wd>

<space/>

</ln>

<ln l="1450" t="4565" r="5808" b="4752" baseLine="4699" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="4565" r="1925" b="4747">depth.</wd>

<space/>

<wd l="2050" t="4565" r="2357" b="4709">The</wd>

<space/>

<wd l="2424" t="4565" r="2966" b="4752">typical</wd>

<space/>

<wd l="3038" t="4565" r="3490" b="4709">levels</wd>

<space/>

<wd l="3566" t="4608" r="3802" b="4709">are</wd>

<space/>

<wd l="3869" t="4565" r="4013" b="4733">4,</wd>

<space/>

<wd l="4094" t="4565" r="4234" b="4733">6,</wd>

<space/>

<wd l="4330" t="4565" r="4507" b="4709">10</wd>

<space/>

<wd l="4584" t="4565" r="4867" b="4709">and</wd>

<space/>

<wd l="4939" t="4565" r="5174" b="4733">20,</wd>

<space/>

<wd l="5256" t="4565" r="5808" b="4752">though</wd>

<space/>

</ln>

<ln l="1454" t="4800" r="5808" b="4987" baseLine="4939" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1454" t="4800" r="2150" b="4944">selection</wd>

<space/>

<wd l="2203" t="4800" r="2376" b="4944">of</wd>

<space/>

<wd l="2410" t="4800" r="2626" b="4944">bit</wd>

<space/>

<wd l="2674" t="4800" r="3182" b="4982">depths</wd>

<space/>

<wd l="3235" t="4819" r="3389" b="4944">to</wd>

<space/>

<wd l="3437" t="4843" r="3701" b="4944">use</wd>

<space/>

<wd l="3758" t="4800" r="4162" b="4944">often</wd>

<space/>

<wd l="4210" t="4800" r="4680" b="4987">yields</wd>

<space/>

<wd l="4733" t="4800" r="5208" b="4944">brittle</wd>

<space/>

<wd l="5256" t="4800" r="5808" b="4944">feature</wd>

<space/>

</ln>

<ln l="1454" t="5040" r="5798" b="5218" baseLine="5179" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1454" t="5059" r="1738" b="5184">sets</wd>

<space/>

<wd l="1814" t="5040" r="2203" b="5218">(Koo</wd>

<space/>

<wd l="2275" t="5059" r="2414" b="5184">et</wd>

<space/>

<wd l="2482" t="5040" r="2717" b="5208">al.,</wd>

<space/>

<wd l="2794" t="5040" r="3298" b="5218">2008),</wd>

<space/>

<wd l="3374" t="5040" r="3658" b="5184">and</wd>

<space/>

<wd l="3730" t="5083" r="3893" b="5184">so</wd>

<space/>

<wd l="3960" t="5083" r="4190" b="5184">we</wd>

<space/>

<wd l="4258" t="5040" r="4666" b="5184">leave</wd>

<space/>

<wd l="4733" t="5040" r="4848" b="5184">it</wd>

<space/>

<wd l="4906" t="5059" r="5059" b="5184">to</wd>

<space/>

<wd l="5126" t="5040" r="5366" b="5184">the</wd>

<space/>

<wd l="5438" t="5040" r="5798" b="5184">clas-</wd>

</ln>

<ln l="1454" t="5280" r="5803" b="5424" baseLine="5414" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1454" t="5280" r="1843" b="5424">sifier</wd>

<space/>

<wd l="1910" t="5299" r="2059" b="5424">to</wd>

<space/>

<wd l="2136" t="5280" r="2650" b="5424">decide</wd>

<space/>

<wd l="2717" t="5280" r="3206" b="5424">which</wd>

<space/>

<wd l="3278" t="5323" r="3634" b="5424">ones</wd>

<space/>

<wd l="3710" t="5323" r="3946" b="5424">are</wd>

<space/>

<wd l="4018" t="5280" r="4541" b="5424">useful.</wd>

<space/>

<wd l="4666" t="5280" r="5141" b="5424">These</wd>

<space/>

<wd l="5218" t="5280" r="5803" b="5424">choices</wd>

<space/>

</ln>

<ln l="1450" t="5520" r="5803" b="5707" baseLine="5654" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="5563" r="1685" b="5664">are</wd>

<space/>

<wd l="1771" t="5520" r="2539" b="5664">examined</wd>

<space/>

<wd l="2616" t="5520" r="2774" b="5659">in</wd>

<space/>

<wd l="2856" t="5563" r="3120" b="5664">our</wd>

<space/>

<wd l="3197" t="5520" r="4243" b="5702">post-exercise</wd>

<space/>

<wd l="4325" t="5520" r="5414" b="5707">investigations</wd>

<space/>

<wd l="5496" t="5520" r="5803" b="5664">into</wd>

<space/>

</ln>

<ln l="1445" t="5755" r="5808" b="5942" baseLine="5894" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="5760" r="1685" b="5904">the</wd>

<space/>

<wd l="1747" t="5760" r="2285" b="5928">model,</wd>

<space/>

<wd l="2362" t="5760" r="2952" b="5904">Section</wd>

<space/>

<wd l="3014" t="5760" r="3302" b="5928">5.1,</wd>

<space/>

<wd l="3379" t="5760" r="3662" b="5904">and</wd>

<space/>

<wd l="3720" t="5760" r="3960" b="5904">the</wd>

<space/>

<wd l="4027" t="5760" r="4622" b="5904">clusters</wd>

<space/>

<wd l="4690" t="5760" r="5395" b="5942">provided</wd>

<space/>

<wd l="5453" t="5760" r="5808" b="5904">with</wd>

<space/>

</ln>

<ln l="1445" t="5995" r="5803" b="6182" baseLine="6134" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="5995" r="1723" b="6139">this</wd>

<space/>

<wd l="1810" t="6038" r="2275" b="6178">paper.</wd>

<space/>

<wd l="2434" t="5995" r="3024" b="6182">Finally,</wd>

<space/>

<wd l="3115" t="6038" r="3346" b="6139">we</wd>

<space/>

<wd l="3432" t="5995" r="3744" b="6139">also</wd>

<space/>

<wd l="3821" t="5995" r="4406" b="6139">include</wd>

<space/>

<wd l="4488" t="5995" r="4728" b="6139">the</wd>

<space/>

<wd l="4805" t="6000" r="5347" b="6139">Brown</wd>

<space/>

<wd l="5424" t="5995" r="5803" b="6139">class</wd>

<space/>

</ln>

<ln l="1445" t="6235" r="3701" b="6418" baseLine="6374" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="6235" r="1858" b="6418">paths</wd>

<space/>

<wd l="1915" t="6235" r="2146" b="6379">for</wd>

<space/>

<wd l="2194" t="6235" r="2438" b="6379">the</wd>

<space/>

<wd l="2486" t="6235" r="3163" b="6418">previous</wd>

<space/>

<wd l="3221" t="6235" r="3701" b="6379">token.</wd>

</ln>

</para>

<para l="1445" t="6485" r="5808" b="8582" alignment="justified" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="1642" t="6485" r="5803" b="6672" baseLine="6624" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="6490" r="1843" b="6629">To</wd>

<space/>

<wd l="1910" t="6485" r="2150" b="6629">aid</wd>

<space/>

<wd l="2208" t="6485" r="2366" b="6624">in</wd>

<space/>

<wd l="2429" t="6485" r="3053" b="6672">filtering</wd>

<space/>

<wd l="3120" t="6504" r="3370" b="6629">out</wd>

<space/>

<wd l="3432" t="6528" r="4128" b="6629">common</wd>

<space/>

<wd l="4186" t="6485" r="4699" b="6629">tokens</wd>

<space/>

<wd l="4766" t="6485" r="5054" b="6629">and</wd>

<space/>

<wd l="5112" t="6485" r="5803" b="6672">reducing</wd>

<space/>

</ln>

<ln l="1445" t="6725" r="5808" b="6912" baseLine="6859" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="6725" r="1685" b="6869">the</wd>

<space/>

<wd l="1762" t="6725" r="2309" b="6907">impact</wd>

<space/>

<wd l="2381" t="6725" r="2717" b="6912">they</wd>

<space/>

<wd l="2798" t="6768" r="3139" b="6912">may</wd>

<space/>

<wd l="3221" t="6725" r="3586" b="6869">have</wd>

<space/>

<wd l="3672" t="6768" r="3826" b="6869">as</wd>

<space/>

<wd l="3912" t="6768" r="4186" b="6912">e.g.</wd>

<space/>

<wd l="4349" t="6725" r="5006" b="6907">spurious</wd>

<space/>

<wd l="5093" t="6744" r="5808" b="6912">gazetteer</wd>

<space/>

</ln>

<ln l="1445" t="6965" r="5808" b="7152" baseLine="7099" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="6965" r="2141" b="7133">matches,</wd>

<space/>

<wd l="2242" t="7008" r="2472" b="7109">we</wd>

<space/>

<wd l="2563" t="6965" r="3466" b="7147">incorporate</wd>

<space/>

<wd l="3562" t="7008" r="3648" b="7109">a</wd>

<space/>

<wd l="3734" t="6984" r="4104" b="7109">term</wd>

<space/>

<wd l="4186" t="6965" r="4978" b="7152">frequency</wd>

<space/>

<wd l="5069" t="6965" r="5458" b="7109">from</wd>

<space/>

<wd l="5549" t="7008" r="5808" b="7109">our</wd>

<space/>

</ln>

<ln l="1445" t="7205" r="5808" b="7392" baseLine="7339" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="7205" r="2160" b="7392">language</wd>

<space/>

<wd l="2237" t="7205" r="2774" b="7349">model.</wd>

<space/>

<wd l="2918" t="7205" r="3269" b="7349">This</wd>

<space/>

<wd l="3350" t="7205" r="3475" b="7349">is</wd>

<space/>

<wd l="3562" t="7205" r="4142" b="7387">applied</wd>

<space/>

<wd l="4214" t="7224" r="4368" b="7349">to</wd>

<space/>

<wd l="4445" t="7224" r="4882" b="7349">terms</wd>

<space/>

<wd l="4963" t="7205" r="5266" b="7349">that</wd>

<space/>

<wd l="5342" t="7248" r="5578" b="7349">are</wd>

<space/>

<wd l="5654" t="7205" r="5808" b="7344">in</wd>

<space/>

</ln>

<ln l="1445" t="7445" r="5808" b="7632" baseLine="7579" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="7445" r="1685" b="7589">the</wd>

<space/>

<wd l="1757" t="7464" r="2006" b="7627">top</wd>

<space/>

<wd l="2083" t="7445" r="2621" b="7613">50,000</wd>

<space/>

<wd l="2698" t="7445" r="3163" b="7589">found</wd>

<space/>

<wd l="3230" t="7445" r="3384" b="7584">in</wd>

<space/>

<wd l="3456" t="7488" r="3720" b="7589">our</wd>

<space/>

<wd l="3792" t="7445" r="4330" b="7632">garden</wd>

<space/>

<wd l="4397" t="7445" r="4762" b="7589">hose</wd>

<space/>

<wd l="4843" t="7445" r="5438" b="7627">sample,</wd>

<space/>

<wd l="5525" t="7445" r="5808" b="7589">and</wd>

<space/>

</ln>

<ln l="1445" t="7680" r="5798" b="7867" baseLine="7819" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="7680" r="2362" b="7862">represented</wd>

<space/>

<wd l="2434" t="7723" r="2587" b="7824">as</wd>

<space/>

<wd l="2664" t="7723" r="2750" b="7824">a</wd>

<space/>

<wd l="2818" t="7680" r="3365" b="7824">feature</wd>

<space/>

<wd l="3437" t="7680" r="3970" b="7867">having</wd>

<space/>

<wd l="4046" t="7723" r="4133" b="7824">a</wd>

<space/>

<wd l="4195" t="7680" r="4622" b="7824">value</wd>

<space/>

<wd l="4699" t="7680" r="5189" b="7824">scaled</wd>

<space/>

<wd l="5256" t="7680" r="5410" b="7819">in</wd>

<space/>

<wd l="5477" t="7723" r="5798" b="7862">pro-</wd>

</ln>

<ln l="1445" t="7920" r="5803" b="8107" baseLine="8059" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="7920" r="2021" b="8102">portion</wd>

<space/>

<wd l="2083" t="7939" r="2232" b="8064">to</wd>

<space/>

<wd l="2304" t="7920" r="2544" b="8064">the</wd>

<space/>

<wd l="2611" t="7920" r="3101" b="8064">term’s</wd>

<space/>

<wd l="3173" t="7920" r="3758" b="8064">relative</wd>

<space/>

<wd l="3826" t="7920" r="4651" b="8107">frequency,</wd>

<space/>

<wd l="4723" t="7920" r="5544" b="8102">multiplied</wd>

<space/>

<wd l="5611" t="7920" r="5803" b="8107">by</wd>

<space/>

</ln>

<ln l="1464" t="8160" r="5808" b="8304" baseLine="8294" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1464" t="8160" r="1738" b="8304">100</wd>

<space/>

<wd l="1790" t="8179" r="1944" b="8304">to</wd>

<space/>

<wd l="1997" t="8160" r="2525" b="8304">reduce</wd>

<space/>

<wd l="2578" t="8160" r="3451" b="8304">underflows</wd>

<space/>

<wd l="3509" t="8160" r="3792" b="8304">and</wd>

<space/>

<wd l="3850" t="8203" r="4358" b="8304">ensure</wd>

<space/>

<wd l="4411" t="8160" r="4526" b="8304">it</wd>

<space/>

<wd l="4570" t="8160" r="4834" b="8304">has</wd>

<space/>

<wd l="4891" t="8203" r="5069" b="8304">an</wd>

<space/>

<wd l="5131" t="8160" r="5808" b="8304">effective</wd>

<space/>

</ln>

<ln l="1445" t="8400" r="2026" b="8582" baseLine="8534" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="8400" r="2026" b="8582">impact.</wd>

</ln>

</para>

<para l="1445" t="8837" r="4214" b="9019" alignment="left" spaceBefore="205" lsp="exactly" lspExact="235" language="en">

<ln l="1445" t="8837" r="4214" b="9019" baseLine="8971" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="1445" t="8837" r="1690" b="8981">3.2</wd>

<space/>

<wd l="1891" t="8837" r="3437" b="9019">Morpho-Syntactic</wd>

<space/>

<wd l="3490" t="8837" r="4214" b="8981">Features</wd>

</ln>

</para>

<para l="1445" t="9168" r="5813" b="10306" alignment="justified" spaceBefore="87" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="9168" r="5808" b="9355" baseLine="9302" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="9173" r="1646" b="9312">To</wd>

<space/>

<wd l="1738" t="9168" r="2232" b="9312">model</wd>

<space/>

<wd l="2333" t="9187" r="2952" b="9336">context,</wd>

<space/>

<wd l="3058" t="9211" r="3288" b="9312">we</wd>

<space/>

<wd l="3384" t="9168" r="3749" b="9312">used</wd>

<space/>

<wd l="3835" t="9168" r="4694" b="9355">reasonably</wd>

<space/>

<wd l="4795" t="9168" r="5808" b="9312">conventional</wd>

<space/>

</ln>

<ln l="1445" t="9403" r="5813" b="9590" baseLine="9542" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="9403" r="2112" b="9547">features:</wd>

<space/>

<wd l="2261" t="9403" r="2501" b="9547">the</wd>

<space/>

<wd l="2587" t="9403" r="3029" b="9547">token</wd>

<space/>

<wd l="3110" t="9403" r="3552" b="9571">itself,</wd>

<space/>

<wd l="3653" t="9403" r="3893" b="9547">the</wd>

<space/>

<wd l="3979" t="9403" r="4291" b="9547">uni-</wd>

<space/>

<wd l="4387" t="9403" r="4670" b="9547">and</wd>

<space/>

<wd l="4757" t="9403" r="5390" b="9590">bigrams</wd>

<space/>

<wd l="5482" t="9403" r="5635" b="9542">in</wd>

<space/>

<wd l="5726" t="9446" r="5813" b="9547">a</wd>

<space/>

</ln>

<ln l="1464" t="9629" r="5808" b="9835" baseLine="9782">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="1464" t="9629" r="1795" b="9835">[−2,</wd>

<space/>

<wd l="1853" t="9629" r="1973" b="9835">2]</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2050" t="9643" r="2496" b="9787">offset</wd>

<space/>

<wd l="2544" t="9643" r="3178" b="9787">window</wd>

<space/>

<wd l="3230" t="9643" r="3624" b="9787">from</wd>

<space/>

<wd l="3667" t="9643" r="3912" b="9787">the</wd>

<space/>

<wd l="3965" t="9662" r="4526" b="9787">current</wd>

<space/>

<wd l="4574" t="9643" r="5059" b="9811">token,</wd>

<space/>

<wd l="5122" t="9643" r="5405" b="9787">and</wd>

<space/>

<wd l="5453" t="9643" r="5808" b="9787">both</wd>

<space/>

</run>

</ln>

<ln l="1445" t="9883" r="5808" b="10070" baseLine="10018">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1445" t="9883" r="2304" b="10066">wordshape</wd>

<space/>

<wd l="2362" t="9883" r="2693" b="10070">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2760" t="9883" r="3370" b="10027">London</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3422" t="9883" r="4114" b="10027">becomes</wd>

<space/>

</run>

<wd l="4157" t="9883" r="4786" b="10061"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Xxxxxx</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4848" t="9883" r="5131" b="10027">and</wd>

<space/>

<wd l="5179" t="9883" r="5808" b="10027">reduced</wd>

<space/>

</run>

</ln>

<ln l="1445" t="10123" r="4277" b="10306" baseLine="10258">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1445" t="10123" r="2304" b="10306">wordshape</wd>

<space/>

</run>

<wd l="2362" t="10123" r="3024" b="10301"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">London</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3077" t="10142" r="3230" b="10267">to</wd>

<space/>

</run>

<wd l="3274" t="10123" r="3552" b="10301"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Xx</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">)</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3610" t="10123" r="4277" b="10267">features.</wd>

</run>

</ln>

</para>

<para l="1445" t="10373" r="5813" b="11275" alignment="justified" spaceBefore="6" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="1642" t="10373" r="5798" b="10560" baseLine="10507" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1642" t="10378" r="1901" b="10517">We</wd>

<space/>

<wd l="1949" t="10373" r="2261" b="10517">also</wd>

<space/>

<wd l="2304" t="10373" r="2995" b="10517">included</wd>

<space/>

<wd l="3038" t="10416" r="3125" b="10517">a</wd>

<space/>

<wd l="3163" t="10373" r="4315" b="10555">part-of-speech</wd>

<space/>

<wd l="4354" t="10392" r="4594" b="10560">tag</wd>

<space/>

<wd l="4637" t="10373" r="4872" b="10517">for</wd>

<space/>

<wd l="4915" t="10373" r="5280" b="10517">each</wd>

<space/>

<wd l="5318" t="10373" r="5798" b="10517">token.</wd>

<space/>

</ln>

<ln l="1445" t="10613" r="5813" b="10800" baseLine="10747" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="10613" r="1915" b="10757">These</wd>

<space/>

<wd l="1968" t="10656" r="2352" b="10757">were</wd>

<space/>

<wd l="2410" t="10613" r="3485" b="10800">automatically</wd>

<space/>

<wd l="3542" t="10613" r="4315" b="10800">generated</wd>

<space/>

<wd l="4363" t="10613" r="4560" b="10800">by</wd>

<space/>

<wd l="4618" t="10656" r="4704" b="10757">a</wd>

<space/>

<wd l="4757" t="10632" r="5328" b="10757">custom</wd>

<space/>

<wd l="5376" t="10632" r="5813" b="10757">tweet</wd>

<space/>

</ln>

<ln l="1445" t="10848" r="5798" b="11035" baseLine="10987" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="10848" r="1752" b="10992">PoS</wd>

<space/>

<wd l="1819" t="10867" r="2318" b="11035">tagger</wd>

<space/>

<wd l="2371" t="10848" r="2803" b="11035">using</wd>

<space/>

<wd l="2866" t="10891" r="3043" b="10992">an</wd>

<space/>

<wd l="3110" t="10848" r="3864" b="10992">extension</wd>

<space/>

<wd l="3926" t="10848" r="4094" b="10992">of</wd>

<space/>

<wd l="4142" t="10848" r="4382" b="10992">the</wd>

<space/>

<wd l="4440" t="10853" r="4790" b="10987">PTB</wd>

<space/>

<wd l="4862" t="10867" r="5328" b="11035">tagset</wd>

<space/>

<wd l="5390" t="10848" r="5798" b="11026">(Der-</wd>

</ln>

<ln l="1450" t="11088" r="3192" b="11275" baseLine="11227" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="11088" r="2050" b="11275">czynski</wd>

<space/>

<wd l="2107" t="11107" r="2246" b="11232">et</wd>

<space/>

<wd l="2299" t="11088" r="2530" b="11256">al.,</wd>

<space/>

<wd l="2592" t="11088" r="3192" b="11266">2013b).</wd>

</ln>

</para>

<para l="1450" t="11338" r="5803" b="11770" alignment="justified" spaceBefore="16" fli="144" lsp="exactly" lspExact="239" language="en">

<ln l="1642" t="11338" r="5803" b="11525" baseLine="11477" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="11342" r="1843" b="11482">To</wd>

<space/>

<wd l="1910" t="11357" r="2491" b="11520">capture</wd>

<space/>

<wd l="2558" t="11338" r="3566" b="11525">orthographic</wd>

<space/>

<wd l="3629" t="11338" r="4613" b="11506">information,</wd>

<space/>

<wd l="4680" t="11381" r="4910" b="11482">we</wd>

<space/>

<wd l="4973" t="11338" r="5299" b="11482">take</wd>

<space/>

<wd l="5371" t="11338" r="5803" b="11482">suffix</wd>

<space/>

</ln>

<ln l="1450" t="11563" r="4200" b="11770" baseLine="11717">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1450" t="11578" r="1733" b="11722">and</wd>

<space/>

<wd l="1781" t="11578" r="2242" b="11760">prefix</wd>

<space/>

<wd l="2294" t="11578" r="2918" b="11722">features</wd>

<space/>

<wd l="2981" t="11578" r="3154" b="11722">of</wd>

<space/>

<wd l="3192" t="11578" r="3691" b="11765">length</wd>

<space/>

</run>

<wd l="3758" t="11563" r="4200" b="11770"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0">[1..3]</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1445" t="11827" r="5808" b="13210" alignment="justified" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="1646" t="11827" r="5803" b="12014" baseLine="11966" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="11827" r="2750" b="12010">Capitalisation</wd>

<space/>

<wd l="2851" t="11827" r="2976" b="11971">is</wd>

<space/>

<wd l="3086" t="11827" r="3989" b="12014">notoriously</wd>

<space/>

<wd l="4094" t="11827" r="4891" b="11971">unreliable</wd>

<space/>

<wd l="4992" t="11827" r="5150" b="11966">in</wd>

<space/>

<wd l="5251" t="11846" r="5803" b="11995">tweets,</wd>

<space/>

</ln>

<ln l="1450" t="12067" r="5803" b="12254" baseLine="12202" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="12067" r="1733" b="12211">and</wd>

<space/>

<wd l="1858" t="12067" r="2170" b="12211">also</wd>

<space/>

<wd l="2299" t="12067" r="2702" b="12211">often</wd>

<space/>

<wd l="2827" t="12067" r="3581" b="12211">overfitted</wd>

<space/>

<wd l="3701" t="12086" r="3854" b="12211">to</wd>

<space/>

<wd l="3979" t="12067" r="4171" b="12254">by</wd>

<space/>

<wd l="4296" t="12067" r="5054" b="12211">newswire</wd>

<space/>

<wd l="5189" t="12086" r="5803" b="12254">systems</wd>

<space/>

</ln>

<ln l="1445" t="12307" r="5808" b="12451" baseLine="12442" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="12307" r="1997" b="12451">trained</wd>

<space/>

<wd l="2045" t="12350" r="2242" b="12451">on</wd>

<space/>

<wd l="2290" t="12350" r="2693" b="12451">more</wd>

<space/>

<wd l="2746" t="12307" r="3504" b="12451">canonical</wd>

<space/>

<wd l="3552" t="12307" r="4013" b="12451">forms</wd>

<space/>

<wd l="4066" t="12307" r="4238" b="12451">of</wd>

<space/>

<wd l="4277" t="12326" r="4608" b="12451">text.</wd>

<space/>

<wd l="4680" t="12312" r="4882" b="12451">To</wd>

<space/>

<wd l="4934" t="12350" r="5347" b="12451">wean</wd>

<space/>

<wd l="5400" t="12307" r="5808" b="12451">these</wd>

<space/>

</ln>

<ln l="1454" t="12547" r="5808" b="12734" baseLine="12682" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1454" t="12566" r="2069" b="12734">systems</wd>

<space/>

<wd l="2150" t="12590" r="2558" b="12734">away</wd>

<space/>

<wd l="2635" t="12547" r="3024" b="12691">from</wd>

<space/>

<wd l="3101" t="12547" r="3696" b="12730">capitals</wd>

<space/>

<wd l="3778" t="12547" r="4214" b="12691">while</wd>

<space/>

<wd l="4291" t="12547" r="4762" b="12734">trying</wd>

<space/>

<wd l="4838" t="12566" r="4992" b="12691">to</wd>

<space/>

<wd l="5069" t="12547" r="5808" b="12691">minimise</wd>

<space/>

</ln>

<ln l="1445" t="12782" r="5803" b="12970" baseLine="12922" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="12782" r="1814" b="12926">false</wd>

<space/>

<wd l="1882" t="12782" r="2669" b="12970">negatives,</wd>

<space/>

<wd l="2741" t="12826" r="2971" b="12926">we</wd>

<space/>

<wd l="3043" t="12782" r="3408" b="12926">used</wd>

<space/>

<wd l="3475" t="12782" r="4723" b="12926">case-insensitive</wd>

<space/>

<wd l="4795" t="12802" r="5582" b="12970">gazetteers</wd>

<space/>

<wd l="5654" t="12802" r="5803" b="12926">to</wd>

<space/>

</ln>

<ln l="1450" t="13022" r="3605" b="13210" baseLine="13162" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="13042" r="2117" b="13210">generate</wd>

<space/>

<wd l="2174" t="13042" r="2885" b="13210">gazetteer</wd>

<space/>

<wd l="2938" t="13022" r="3605" b="13166">features.</wd>

</ln>

</para>

<para l="1445" t="13459" r="2794" b="13603" alignment="left" spaceBefore="200" lsp="exactly" lspExact="235" language="en">

<ln l="1445" t="13459" r="2794" b="13603" baseLine="13594" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="9">

<wd l="1445" t="13459" r="1690" b="13603">3.3</wd>

<space/>

<wd l="1896" t="13459" r="2794" b="13603">Gazetteers</wd>

</ln>

</para>

<para l="1445" t="13790" r="5818" b="14218" alignment="justified" spaceBefore="88" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="13790" r="5818" b="13978" baseLine="13925" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="13790" r="1930" b="13934">While</wd>

<space/>

<wd l="1978" t="13834" r="2208" b="13934">we</wd>

<space/>

<wd l="2266" t="13790" r="2981" b="13934">collected</wd>

<space/>

<wd l="3038" t="13790" r="3322" b="13934">and</wd>

<space/>

<wd l="3374" t="13790" r="4454" b="13973">experimented</wd>

<space/>

<wd l="4502" t="13790" r="4858" b="13934">with</wd>

<space/>

<wd l="4910" t="13834" r="4997" b="13934">a</wd>

<space/>

<wd l="5045" t="13790" r="5587" b="13978">variety</wd>

<space/>

<wd l="5645" t="13790" r="5818" b="13934">of</wd>

<space/>

</ln>

<ln l="1450" t="14030" r="4522" b="14218" baseLine="14165" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="14050" r="2280" b="14218">gazetteers,</wd>

<space/>

<wd l="2338" t="14030" r="2578" b="14174">the</wd>

<space/>

<wd l="2630" t="14050" r="3019" b="14174">most</wd>

<space/>

<wd l="3067" t="14030" r="3629" b="14213">helpful</wd>

<space/>

<wd l="3686" t="14074" r="4042" b="14174">ones</wd>

<space/>

<wd l="4099" t="14074" r="4522" b="14174">were:</wd>

</ln>

</para>

<para l="1651" t="14482" r="5798" b="14914" alignment="justified" li="360" spaceBefore="216" fli="-216" lsp="exactly" lspExact="240" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1651" t="14486" r="5798" b="14674" baseLine="14621" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="14491" r="1843" b="14621">•</wd>

<tab position="1724"/>

<wd l="1843" t="14486" r="2549" b="14630">Freebase</wd>

<space/>

<wd l="2664" t="14506" r="3451" b="14674">gazetteers</wd>

<space/>

<wd l="3566" t="14486" r="4066" b="14630">mined</wd>

<space/>

<wd l="4171" t="14486" r="4406" b="14630">for</wd>

<space/>

<wd l="4517" t="14486" r="5050" b="14630">distant</wd>

<space/>

<wd l="5165" t="14486" r="5798" b="14669">supervi-</wd>

</ln>

<ln l="1853" t="14726" r="4248" b="14914" baseLine="14861" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1853" t="14726" r="2174" b="14870">sion</wd>

<space/>

<wd l="2232" t="14726" r="3197" b="14914">(Augenstein</wd>

<space/>

<wd l="3250" t="14746" r="3394" b="14870">et</wd>

<space/>

<wd l="3446" t="14726" r="3677" b="14894">al.,</wd>

<space/>

<wd l="3739" t="14726" r="4248" b="14904">2014);</wd>

</ln>

</para>

<para l="1651" t="15163" r="5803" b="15355" alignment="justified" li="360" spaceBefore="201" spaceAfter="53" fli="-216" lsp="exactly" lspExact="240" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="1651" t="15168" r="5803" b="15355" baseLine="15302" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1651" t="15173" r="1843" b="15302">•</wd>

<tab position="1723"/>

<wd l="1843" t="15173" r="2458" b="15312">ANNIE</wd>

<space/>

<wd l="2506" t="15168" r="2813" b="15312">first</wd>

<space/>

<wd l="2851" t="15211" r="3278" b="15312">name</wd>

<space/>

<wd l="3322" t="15168" r="3638" b="15312">lists</wd>

<space/>

<wd l="3691" t="15168" r="4781" b="15355">(Cunningham</wd>

<space/>

<wd l="4824" t="15187" r="4963" b="15312">et</wd>

<space/>

<wd l="5006" t="15168" r="5242" b="15336">al.,</wd>

<space/>

<wd l="5294" t="15168" r="5803" b="15346">2002);</wd>

</ln>

</para>

</column>

<column l="6144" t="1320" r="10800" b="15408">

<rulerline l="6144" t="1330" r="10800" b="1330" type="single" width="14" color="000000"/>

<para l="6346" t="1421" r="8386" b="1565" alignment="left" li="144" spaceBefore="68" spaceAfter="12" lsp="exactly" lspExact="184" language="en">

<tabs position="6346"/>

<ln l="6346" t="1421" r="8386" b="1565" baseLine="1531" bold="true" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="1421" r="6566" b="1536">NE</wd>

<space/>

<wd l="6610" t="1430" r="6902" b="1565">type</wd>

<tab position="6902"/>

<wd l="7445" t="1421" r="8054" b="1536">Freebase</wd>

<space/>

<wd l="8093" t="1430" r="8386" b="1565">type</wd>

</ln>

</para>

<rulerline l="6144" t="1598" r="10800" b="1598" type="single" width="10" color="000000"/>

<para l="6346" t="1632" r="9302" b="1781" alignment="left" li="144" lsp="exactly" lspExact="197" language="en">

<tabs position="6346"/>

<ln l="6346" t="1632" r="9302" b="1781" baseLine="1738" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="1666" r="6926" b="1781">company</wd>

<tab position="6926"/>

<wd l="7440" t="1632" r="8602" b="1747">/business/business</wd>

<space/>

<wd l="8664" t="1632" r="9302" b="1781">operation,</wd>

</ln>

</para>

<para l="7440" t="1829" r="9125" b="1978" alignment="left" li="1296" lsp="exactly" lspExact="198" language="en">

<ln l="7440" t="1829" r="9125" b="1978" baseLine="1934" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7440" t="1829" r="9125" b="1978">/organization/organization</wd>

</ln>

</para>

<para l="6346" t="2030" r="10325" b="2342" alignment="left" li="1296" ri="432" spaceBefore="1" fli="-1152" lsp="exactly" lspExact="199" language="en">

<tabs position="6346"/>

<ln l="6346" t="2030" r="10325" b="2179" baseLine="2136" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="6346" t="2030" r="6797" b="2179">facility</wd>

<tab position="6797"/>

<wd l="7440" t="2030" r="10325" b="2179">/architecture/building,/architecture/structure,
</wd>

</ln>

<ln l="7440" t="2227" r="8962" b="2342" baseLine="2333" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0" forcedEOF="true">

<wd l="7440" t="2227" r="8304" b="2342">/travel/tourist</wd>

<space/>

<wd l="8362" t="2227" r="8962" b="2342">attraction</wd>

</ln>

</para>

<para l="6346" t="2429" r="8563" b="2578" alignment="left" li="144" spaceBefore="2" lsp="exactly" lspExact="199" language="en">

<tabs position="6346"/>

<ln l="6346" t="2429" r="8563" b="2578" baseLine="2534" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="2429" r="6821" b="2578">geo-loc</wd>

<tab position="6821"/>

<wd l="7440" t="2429" r="8563" b="2544">/location/location</wd>

</ln>

</para>

<para l="6346" t="2626" r="8050" b="2741" alignment="left" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="6346"/>

<ln l="6346" t="2626" r="8050" b="2741" baseLine="2736" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="6346" t="2626" r="6744" b="2741">movie</wd>

<tab position="6744"/>

<wd l="7440" t="2626" r="8050" b="2741">/film/film</wd>

</ln>

</para>

<para l="6346" t="2827" r="8194" b="2942" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="6346"/>

<ln l="6346" t="2827" r="8194" b="2942" baseLine="2933" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="2827" r="7051" b="2942">musicartist</wd>

<tab position="7051"/>

<wd l="7445" t="2827" r="8194" b="2942">music/artist</wd>

</ln>

</para>

<para l="6346" t="3024" r="9730" b="3173" alignment="left" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="6346"/>

<ln l="6346" t="3024" r="9730" b="3173" baseLine="3134" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="3024" r="6682" b="3139">other</wd>

<tab position="6682"/>

<wd l="7440" t="3024" r="8808" b="3173">/education/university,</wd>

<space/>

<wd l="8851" t="3024" r="9730" b="3173">/time/holiday,</wd>

</ln>

</para>

<para l="7440" t="3226" r="8803" b="3374" alignment="left" li="1296" lsp="exactly" lspExact="198" language="en">

<ln l="7440" t="3226" r="8803" b="3374" baseLine="3331" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7440" t="3226" r="8400" b="3374">/time/recurring</wd>

<space/>

<wd l="8458" t="3240" r="8803" b="3341">event</wd>

</ln>

</para>

<para l="6346" t="3422" r="8386" b="3571" alignment="left" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="6346"/>

<ln l="6346" t="3422" r="8386" b="3571" baseLine="3533" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="3456" r="6773" b="3571">person</wd>

<tab position="6773"/>

<wd l="7440" t="3422" r="8386" b="3571">/people/person</wd>

</ln>

</para>

<para l="6346" t="3624" r="10339" b="3773" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="6346"/>

<ln l="6346" t="3624" r="10339" b="3773" baseLine="3730" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="3624" r="6840" b="3773">product</wd>

<tab position="6840"/>

<wd l="7440" t="3624" r="8698" b="3739">/business/consumer</wd>

<space/>

<wd l="8750" t="3624" r="9269" b="3773">product,</wd>

<space/>

<wd l="9312" t="3624" r="10339" b="3758">/business/brand,</wd>

</ln>

</para>

<para l="7440" t="3821" r="10589" b="4368" alignment="left" li="1296" ri="216" spaceBefore="3" lsp="exactly" lspExact="199" language="en">

<ln l="7440" t="3821" r="10589" b="3970" baseLine="3931" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7440" t="3821" r="8717" b="3970">/computer/software,</wd>

<space/>

<wd l="8760" t="3821" r="10051" b="3970">/computer/operating</wd>

<space/>

<wd l="10118" t="3835" r="10589" b="3970">system,</wd>

<space/>

</ln>

<ln l="7440" t="4022" r="9672" b="4171" baseLine="4128" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7440" t="4022" r="9000" b="4171">/computer/programming</wd>

<space/>

<wd l="9062" t="4022" r="9672" b="4171">language,</wd>

<space/>

</ln>

<ln l="7440" t="4219" r="9067" b="4368" baseLine="4330" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="7440" t="4219" r="8515" b="4368">/digicams/digital</wd>

<space/>

<wd l="8573" t="4253" r="9067" b="4354">camera,</wd>

</ln>

</para>

<para l="7440" t="4421" r="10555" b="5150" alignment="left" li="1296" ri="216" lsp="exactly" lspExact="198" language="en">

<ln l="7440" t="4421" r="10392" b="4570" baseLine="4526" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7440" t="4421" r="8371" b="4570">/cvg/computer</wd>

<space/>

<wd l="8424" t="4421" r="9158" b="4570">videogame,</wd>

<space/>

<wd l="9197" t="4421" r="9749" b="4570">/cvg/cvg</wd>

<space/>

<wd l="9806" t="4421" r="10392" b="4570">platform,</wd>

<space/>

</ln>

<ln l="7440" t="4618" r="10555" b="4766" baseLine="4728" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7440" t="4618" r="8150" b="4752">/food/food,</wd>

<space/>

<wd l="8194" t="4618" r="9182" b="4766">/food/beverage,</wd>

<space/>

<wd l="9221" t="4618" r="9830" b="4752">/food/tea,</wd>

<space/>

<wd l="9869" t="4618" r="10555" b="4752">/food/beer,</wd>

<space/>

</ln>

<ln l="7440" t="4819" r="10411" b="4968" baseLine="4925" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7440" t="4819" r="8342" b="4968">/food/brewery</wd>

<space/>

<wd l="8400" t="4819" r="8765" b="4934">brand</wd>

<space/>

<wd l="8822" t="4819" r="8966" b="4934">of</wd>

<space/>

<wd l="9014" t="4819" r="9312" b="4954">beer,</wd>

<space/>

<wd l="9355" t="4819" r="10118" b="4968">/food/candy</wd>

<space/>

<wd l="10181" t="4819" r="10411" b="4954">bar,</wd>

<space/>

</ln>

<ln l="7440" t="5016" r="9768" b="5150" baseLine="5126" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="7440" t="5016" r="9768" b="5150">/food/cheese,/food/dish,/wine/wine</wd>

</ln>

</para>

<para l="7440" t="5218" r="9398" b="5366" alignment="left" li="1296" lsp="exactly" lspExact="198" language="en">

<ln l="7440" t="5218" r="9398" b="5366" baseLine="5323" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="7440" t="5218" r="8006" b="5333">/distilled</wd>

<space/>

<wd l="8064" t="5218" r="9010" b="5366">spirits/distilled</wd>

<space/>

<wd l="9072" t="5218" r="9398" b="5366">spirit</wd>

</ln>

</para>

<para l="6350" t="5414" r="8664" b="5563" alignment="left" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="6350"/>

<ln l="6350" t="5414" r="8664" b="5563" baseLine="5525" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6350" t="5429" r="7037" b="5563">sportsteam</wd>

<tab position="7037"/>

<wd l="7440" t="5414" r="8290" b="5563">/sports/sports</wd>

<space/>

<wd l="8352" t="5429" r="8664" b="5530">team</wd>

</ln>

</para>

<para l="6346" t="5616" r="8381" b="5765" alignment="left" li="144" spaceAfter="50" lsp="exactly" lspExact="198" language="en">

<tabs position="6346"/>

<ln l="6346" t="5616" r="8381" b="5765" baseLine="5722" underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="5616" r="6806" b="5731">tvshow</wd>

<tab position="6806"/>

<wd l="7440" t="5616" r="7781" b="5731">/tv/tv</wd>

<space/>

<wd l="7838" t="5650" r="8381" b="5765">program</wd>

</ln>

</para>

<rulerline l="6144" t="5827" r="10800" b="5827" type="single" width="14" color="000000"/>

<para l="6149" t="6149" r="10507" b="6576" alignment="justified" ri="288" spaceBefore="282" lsp="exactly" lspExact="240" language="en">

<ln l="6149" t="6149" r="10507" b="6336" baseLine="6288" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6149" r="6586" b="6293">Table</wd>

<space/>

<wd l="6677" t="6149" r="6797" b="6293">1:</wd>

<space/>

<wd l="6917" t="6154" r="7181" b="6293">NE</wd>

<space/>

<wd l="7253" t="6168" r="7666" b="6336">types</wd>

<space/>

<wd l="7747" t="6149" r="8035" b="6293">and</wd>

<space/>

<wd l="8107" t="6149" r="9240" b="6336">corresponding</wd>

<space/>

<wd l="9312" t="6149" r="10018" b="6293">Freebase</wd>

<space/>

<wd l="10094" t="6168" r="10507" b="6336">types</wd>

<space/>

</ln>

<ln l="6149" t="6389" r="8328" b="6576" baseLine="6528" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6389" r="6514" b="6533">used</wd>

<space/>

<wd l="6562" t="6389" r="6797" b="6533">for</wd>

<space/>

<wd l="6850" t="6389" r="7483" b="6576">creating</wd>

<space/>

<wd l="7541" t="6408" r="8328" b="6576">gazetteers</wd>

</ln>

</para>

<para l="6355" t="7051" r="10517" b="7474" alignment="justified" li="360" ri="288" spaceBefore="422" fli="-216" lsp="exactly" lspExact="240" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6355" t="7056" r="10517" b="7243" baseLine="7190" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="7061" r="6547" b="7190">•</wd>

<tab position="6427"/>

<wd l="6547" t="7056" r="6917" b="7200">First</wd>

<space/>

<wd l="7099" t="7099" r="7526" b="7200">name</wd>

<space/>

<wd l="7718" t="7056" r="8251" b="7243">trigger</wd>

<space/>

<wd l="8434" t="7075" r="8875" b="7200">terms</wd>

<space/>

<wd l="9077" t="7056" r="10037" b="7243">(Derczynski</wd>

<space/>

<wd l="10229" t="7056" r="10517" b="7200">and</wd>

<space/>

</ln>

<ln l="6547" t="7296" r="8002" b="7474" baseLine="7430" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6547" t="7296" r="7430" b="7464">Bontcheva,</wd>

<space/>

<wd l="7493" t="7296" r="8002" b="7474">2014);</wd>

</ln>

</para>

<para l="6355" t="7738" r="10517" b="8405" alignment="justified" li="360" ri="288" spaceBefore="210" fli="-216" lsp="exactly" lspExact="240" language="en">

<bullet type="bulleted" value="smallCircle" numChars="2">

</bullet>

<ln l="6355" t="7742" r="10517" b="7925" baseLine="7877" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="7747" r="6547" b="7877">•</wd>

<tab position="6428"/>

<wd l="6547" t="7742" r="6931" b="7886">Lists</wd>

<space/>

<wd l="7008" t="7742" r="7181" b="7886">of</wd>

<space/>

<wd l="7234" t="7742" r="7766" b="7886">named</wd>

<space/>

<wd l="7834" t="7742" r="8539" b="7925">temporal</wd>

<space/>

<wd l="8616" t="7742" r="9533" b="7925">expressions</wd>

<space/>

<wd l="9614" t="7742" r="10301" b="7920">(Brucato</wd>

<space/>

<wd l="10373" t="7762" r="10517" b="7886">et</wd>

<space/>

</ln>

<ln l="6552" t="7978" r="10512" b="8160" baseLine="8117" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6552" t="7978" r="6782" b="8146">al.,</wd>

<space/>

<wd l="6869" t="7978" r="7373" b="8155">2013),</wd>

<space/>

<wd l="7454" t="7978" r="7819" b="8122">used</wd>

<space/>

<wd l="7891" t="7978" r="8174" b="8122">due</wd>

<space/>

<wd l="8242" t="7997" r="8395" b="8122">to</wd>

<space/>

<wd l="8467" t="7978" r="8707" b="8122">the</wd>

<space/>

<wd l="8779" t="7978" r="9629" b="8160">prevalence</wd>

<space/>

<wd l="9706" t="7978" r="9878" b="8122">of</wd>

<space/>

<wd l="9936" t="7978" r="10512" b="8122">festival</wd>

<space/>

</ln>

<ln l="6552" t="8218" r="9595" b="8405" baseLine="8357">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6552" t="8218" r="6835" b="8362">and</wd>

<space/>

<wd l="6888" t="8237" r="7310" b="8362">event</wd>

<space/>

<wd l="7358" t="8261" r="7862" b="8362">names</wd>

<space/>

<wd l="7915" t="8218" r="8074" b="8357">in</wd>

<space/>

<wd l="8122" t="8218" r="8362" b="8362">the</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8419" t="8218" r="8846" b="8362">other</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8890" t="8237" r="9595" b="8405">category.</wd>

</run>

</ln>

</para>

<para l="6149" t="8678" r="10517" b="12211" alignment="justified" ri="288" spaceBefore="203" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="6346" t="8678" r="10507" b="8866" baseLine="8813" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6346" t="8678" r="7056" b="8822">Freebase</wd>

<space/>

<wd l="7138" t="8678" r="7968" b="8856">(Bollacker</wd>

<space/>

<wd l="8045" t="8698" r="8189" b="8822">et</wd>

<space/>

<wd l="8261" t="8678" r="8496" b="8846">al.,</wd>

<space/>

<wd l="8587" t="8678" r="9038" b="8856">2008)</wd>

<space/>

<wd l="9120" t="8678" r="9245" b="8822">is</wd>

<space/>

<wd l="9331" t="8722" r="9418" b="8822">a</wd>

<space/>

<wd l="9485" t="8678" r="9878" b="8866">large</wd>

<space/>

<wd l="9955" t="8678" r="10507" b="8822">knowl-</wd>

</ln>

<ln l="6154" t="8890" r="10507" b="9106" baseLine="9051">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6154" t="8918" r="6523" b="9106">edge</wd>

<space/>

<wd l="6610" t="8918" r="6960" b="9062">base</wd>

<space/>

<wd l="7051" t="8918" r="7853" b="9106">consisting</wd>

<space/>

<wd l="7949" t="8918" r="8117" b="9062">of</wd>

<space/>

<wd l="8198" t="8918" r="8746" b="9062">around</wd>

<space/>

<wd l="8837" t="8918" r="8918" b="9062">3</wd>

<space/>

<wd l="9014" t="8918" r="9538" b="9062">billion</wd>

<space/>

</run>

<wd l="9619" t="8890" r="10114" b="9062"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">facts</run>

<run underlined="none" subsuperscript="superscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="10291" t="8923" r="10507" b="9062">As</wd>

<space/>

</run>

</ln>

<ln l="6158" t="9158" r="10517" b="9346" baseLine="9293" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6158" t="9158" r="6557" b="9326">such,</wd>

<space/>

<wd l="6696" t="9158" r="6811" b="9302">it</wd>

<space/>

<wd l="6922" t="9158" r="7181" b="9302">has</wd>

<space/>

<wd l="7306" t="9158" r="7680" b="9302">been</wd>

<space/>

<wd l="7795" t="9158" r="8165" b="9302">used</wd>

<space/>

<wd l="8285" t="9158" r="9173" b="9346">extensively</wd>

<space/>

<wd l="9298" t="9202" r="9451" b="9302">as</wd>

<space/>

<wd l="9571" t="9158" r="10517" b="9346">background</wd>

<space/>

</ln>

<ln l="6149" t="9394" r="10517" b="9581" baseLine="9533" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="9394" r="7018" b="9581">knowledge</wd>

<space/>

<wd l="7094" t="9394" r="7325" b="9538">for</wd>

<space/>

<wd l="7402" t="9398" r="7776" b="9538">NLP</wd>

<space/>

<wd l="7853" t="9394" r="8246" b="9538">tasks</wd>

<space/>

<wd l="8338" t="9394" r="8693" b="9538">such</wd>

<space/>

<wd l="8774" t="9437" r="8928" b="9538">as</wd>

<space/>

<wd l="9014" t="9394" r="9461" b="9581">entity</wd>

<space/>

<wd l="9547" t="9394" r="9830" b="9538">and</wd>

<space/>

<wd l="9907" t="9394" r="10517" b="9538">relation</wd>

<space/>

</ln>

<ln l="6154" t="9634" r="10517" b="9821" baseLine="9773" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6154" t="9634" r="6946" b="9778">extraction</wd>

<space/>

<wd l="7042" t="9634" r="8006" b="9821">(Augenstein</wd>

<space/>

<wd l="8098" t="9653" r="8237" b="9778">et</wd>

<space/>

<wd l="8328" t="9634" r="8558" b="9802">al.,</wd>

<space/>

<wd l="8669" t="9634" r="9168" b="9811">2014).</wd>

<space/>

<wd l="9360" t="9634" r="10190" b="9778">Gazetteers</wd>

<space/>

<wd l="10282" t="9634" r="10517" b="9778">for</wd>

<space/>

</ln>

<ln l="6149" t="9874" r="10507" b="10061" baseLine="10008" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="9874" r="6389" b="10018">the</wd>

<space/>

<wd l="6466" t="9874" r="6638" b="10018">10</wd>

<space/>

<wd l="6701" t="9874" r="7147" b="10061">entity</wd>

<space/>

<wd l="7205" t="9893" r="7618" b="10061">types</wd>

<space/>

<wd l="7675" t="9917" r="8064" b="10018">were</wd>

<space/>

<wd l="8117" t="9874" r="8818" b="10018">retrieved</wd>

<space/>

<wd l="8870" t="9874" r="9259" b="10018">from</wd>

<space/>

<wd l="9312" t="9874" r="10018" b="10018">Freebase</wd>

<space/>

<wd l="10080" t="9874" r="10507" b="10018">semi-</wd>

</ln>

<ln l="6154" t="10114" r="10507" b="10301" baseLine="10248" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6154" t="10114" r="7258" b="10301">automatically.</wd>

<space/>

<wd l="7382" t="10114" r="7829" b="10258">Some</wd>

<space/>

<wd l="7901" t="10114" r="8074" b="10258">of</wd>

<space/>

<wd l="8126" t="10114" r="8371" b="10258">the</wd>

<space/>

<wd l="8438" t="10133" r="8851" b="10301">types</wd>

<space/>

<wd l="8928" t="10114" r="9811" b="10296">correspond</wd>

<space/>

<wd l="9874" t="10133" r="10022" b="10258">to</wd>

<space/>

<wd l="10094" t="10118" r="10507" b="10258">Free-</wd>

</ln>

<ln l="6149" t="10354" r="10507" b="10541" baseLine="10488" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="10354" r="6499" b="10498">base</wd>

<space/>

<wd l="6576" t="10373" r="6994" b="10541">types</wd>

<space/>

<wd l="7080" t="10354" r="7714" b="10541">directly,</wd>

<space/>

<wd l="7805" t="10397" r="8078" b="10541">e.g.</wd>

<space/>

<wd l="8227" t="10397" r="8760" b="10536">person</wd>

<space/>

<wd l="8842" t="10354" r="9792" b="10536">corresponds</wd>

<space/>

<wd l="9874" t="10373" r="10027" b="10498">to</wd>

<space/>

<wd l="10099" t="10354" r="10507" b="10536">/peo-</wd>

</ln>

<ln l="6149" t="10594" r="10512" b="10781" baseLine="10728">

<wd l="6149" t="10594" r="7032" b="10776"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">ple/</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">person</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

<wd l="7080" t="10594" r="7334" b="10738">but</wd>

<space/>

<wd l="7373" t="10594" r="7608" b="10738">for</wd>

<space/>

<wd l="7651" t="10594" r="8059" b="10738">other</wd>

<space/>

<wd l="8098" t="10613" r="8515" b="10781">types</wd>

<space/>

<wd l="8568" t="10594" r="8928" b="10738">such</wd>

<space/>

<wd l="8971" t="10637" r="9130" b="10738">as</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="9154" t="10594" r="9792" b="10776">product</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="9830" t="10594" r="10229" b="10738">there</wd>

<space/>

<wd l="10277" t="10637" r="10512" b="10738">are</wd>

<space/>

</run>

</ln>

<ln l="6149" t="10829" r="10507" b="11016" baseLine="10968" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="10872" r="6346" b="10973">no</wd>

<space/>

<wd l="6422" t="10829" r="7027" b="11016">directly</wd>

<space/>

<wd l="7104" t="10829" r="8237" b="11016">corresponding</wd>

<space/>

<wd l="8314" t="10848" r="8774" b="11016">types.</wd>

<space/>

<wd l="8914" t="10834" r="9115" b="10973">To</wd>

<space/>

<wd l="9192" t="10829" r="9595" b="10973">build</wd>

<space/>

<wd l="9672" t="10848" r="10507" b="11016">gazetteers,</wd>

<space/>

</ln>

<ln l="6149" t="11069" r="10507" b="11256" baseLine="11208" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="11112" r="6379" b="11213">we</wd>

<space/>

<wd l="6427" t="11069" r="7147" b="11213">therefore</wd>

<space/>

<wd l="7195" t="11069" r="7896" b="11213">retrieved</wd>

<space/>

<wd l="7949" t="11069" r="8141" b="11213">all</wd>

<space/>

<wd l="8189" t="11069" r="8894" b="11213">Freebase</wd>

<space/>

<wd l="8942" t="11088" r="9360" b="11256">types</wd>

<space/>

<wd l="9413" t="11069" r="9643" b="11213">for</wd>

<space/>

<wd l="9696" t="11069" r="9888" b="11213">all</wd>

<space/>

<wd l="9941" t="11069" r="10507" b="11213">entities</wd>

<space/>

</ln>

<ln l="6149" t="11309" r="10517" b="11496" baseLine="11443" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="11309" r="6302" b="11448">in</wd>

<space/>

<wd l="6360" t="11309" r="6600" b="11453">the</wd>

<space/>

<wd l="6658" t="11309" r="7272" b="11496">training</wd>

<space/>

<wd l="7334" t="11352" r="7858" b="11491">corpus</wd>

<space/>

<wd l="7920" t="11309" r="8203" b="11453">and</wd>

<space/>

<wd l="8266" t="11309" r="8899" b="11453">selected</wd>

<space/>

<wd l="8957" t="11309" r="9197" b="11453">the</wd>

<space/>

<wd l="9254" t="11328" r="9643" b="11453">most</wd>

<space/>

<wd l="9696" t="11309" r="10517" b="11491">prominent</wd>

<space/>

</ln>

<ln l="6149" t="11549" r="10512" b="11736" baseLine="11683" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="11549" r="6854" b="11693">Freebase</wd>

<space/>

<wd l="6898" t="11568" r="7315" b="11736">types</wd>

<space/>

<wd l="7363" t="11592" r="7618" b="11731">per</wd>

<space/>

<wd l="7666" t="11549" r="8107" b="11736">entity</wd>

<space/>

<wd l="8155" t="11568" r="8496" b="11736">type</wd>

<space/>

<wd l="8539" t="11549" r="8698" b="11688">in</wd>

<space/>

<wd l="8736" t="11549" r="8981" b="11693">the</wd>

<space/>

<wd l="9029" t="11549" r="9379" b="11736">gold</wd>

<space/>

<wd l="9427" t="11549" r="10133" b="11693">standard.</wd>

<space/>

<wd l="10205" t="11549" r="10512" b="11693">The</wd>

<space/>

</ln>

<ln l="6149" t="11789" r="10512" b="11976" baseLine="11923" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="11789" r="6394" b="11933">list</wd>

<space/>

<wd l="6442" t="11789" r="6614" b="11933">of</wd>

<space/>

<wd l="6653" t="11789" r="7358" b="11933">Freebase</wd>

<space/>

<wd l="7406" t="11808" r="7819" b="11976">types</wd>

<space/>

<wd l="7877" t="11789" r="9010" b="11976">corresponding</wd>

<space/>

<wd l="9058" t="11808" r="9211" b="11933">to</wd>

<space/>

<wd l="9264" t="11789" r="9624" b="11933">each</wd>

<space/>

<wd l="9677" t="11789" r="10123" b="11976">entity</wd>

<space/>

<wd l="10171" t="11808" r="10512" b="11976">type</wd>

<space/>

</ln>

<ln l="6149" t="12024" r="9269" b="12211" baseLine="12163" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="12024" r="6302" b="12163">in</wd>

<space/>

<wd l="6355" t="12024" r="6595" b="12168">the</wd>

<space/>

<wd l="6653" t="12024" r="7003" b="12211">gold</wd>

<space/>

<wd l="7061" t="12024" r="7728" b="12168">standard</wd>

<space/>

<wd l="7776" t="12024" r="7901" b="12168">is</wd>

<space/>

<wd l="7958" t="12024" r="8390" b="12168">listed</wd>

<space/>

<wd l="8438" t="12024" r="8597" b="12163">in</wd>

<space/>

<wd l="8645" t="12024" r="9082" b="12168">Table</wd>

<space/>

<wd l="9154" t="12024" r="9269" b="12168">1.</wd>

</ln>

</para>

<para l="6154" t="12278" r="10507" b="12941" alignment="justified" ri="288" spaceBefore="10" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="6346" t="12278" r="10507" b="12466" baseLine="12413" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="12283" r="6624" b="12422">For</wd>

<space/>

<wd l="6667" t="12278" r="7027" b="12422">each</wd>

<space/>

<wd l="7066" t="12278" r="7771" b="12422">Freebase</wd>

<space/>

<wd l="7814" t="12298" r="8198" b="12466">type,</wd>

<space/>

<wd l="8261" t="12298" r="8899" b="12461">separate</wd>

<space/>

<wd l="8947" t="12298" r="9730" b="12466">gazetteers</wd>

<space/>

<wd l="9778" t="12322" r="10162" b="12422">were</wd>

<space/>

<wd l="10210" t="12322" r="10507" b="12422">cre-</wd>

</ln>

<ln l="6154" t="12518" r="10507" b="12706" baseLine="12653" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="12518" r="6480" b="12662">ated</wd>

<space/>

<wd l="6552" t="12518" r="6782" b="12662">for</wd>

<space/>

<wd l="6859" t="12518" r="7306" b="12706">entity</wd>

<space/>

<wd l="7378" t="12562" r="7882" b="12662">names</wd>

<space/>

<wd l="7963" t="12518" r="8246" b="12662">and</wd>

<space/>

<wd l="8323" t="12518" r="9149" b="12662">alternative</wd>

<space/>

<wd l="9221" t="12562" r="9725" b="12662">names</wd>

<space/>

<wd l="9811" t="12518" r="10507" b="12696">(aliases),</wd>

<space/>

</ln>

<ln l="6158" t="12754" r="9490" b="12941" baseLine="12893" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6158" t="12754" r="6557" b="12898">since</wd>

<space/>

<wd l="6610" t="12754" r="6850" b="12898">the</wd>

<space/>

<wd l="6902" t="12754" r="7310" b="12898">latter</wd>

<space/>

<wd l="7358" t="12754" r="7704" b="12898">tend</wd>

<space/>

<wd l="7752" t="12773" r="7906" b="12898">to</wd>

<space/>

<wd l="7958" t="12754" r="8146" b="12898">be</wd>

<space/>

<wd l="8198" t="12754" r="8371" b="12898">of</wd>

<space/>

<wd l="8410" t="12754" r="8861" b="12898">lower</wd>

<space/>

<wd l="8914" t="12754" r="9490" b="12941">quality.</wd>

</ln>

</para>

<para l="6149" t="13008" r="10517" b="13910" alignment="justified" ri="288" spaceBefore="5" spaceAfter="157" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="6346" t="13008" r="10512" b="13195" baseLine="13142" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6346" t="13008" r="6811" b="13152">There</wd>

<space/>

<wd l="6883" t="13051" r="7267" b="13152">were</wd>

<space/>

<wd l="7354" t="13008" r="7896" b="13152">several</wd>

<space/>

<wd l="7978" t="13008" r="8381" b="13152">other</wd>

<space/>

<wd l="8458" t="13027" r="9173" b="13195">gazetteer</wd>

<space/>

<wd l="9250" t="13051" r="9835" b="13152">sources</wd>

<space/>

<wd l="9912" t="13008" r="10210" b="13152">that</wd>

<space/>

<wd l="10282" t="13051" r="10512" b="13152">we</wd>

<space/>

</ln>

<ln l="6149" t="13214" r="10502" b="13430" baseLine="13379">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="6149" t="13243" r="6514" b="13387">tried</wd>

<space/>

<wd l="6562" t="13243" r="6816" b="13387">but</wd>

<space/>

<wd l="6859" t="13243" r="7349" b="13387">which</wd>

<space/>

<wd l="7402" t="13243" r="7651" b="13387">did</wd>

<space/>

<wd l="7699" t="13262" r="7958" b="13387">not</wd>

<space/>

<wd l="8002" t="13243" r="8410" b="13387">work</wd>

<space/>

<wd l="8458" t="13286" r="8808" b="13430">very</wd>

<space/>

<wd l="8856" t="13243" r="9240" b="13387">well:</wd>

<space/>

<wd l="9317" t="13243" r="9802" b="13387">IMDb</wd>

<space/>

</run>

<wd l="9859" t="13214" r="10502" b="13426"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">dumps,</run>

<run underlined="none" subsuperscript="superscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">2</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

</ln>

<ln l="6149" t="13483" r="10507" b="13661" baseLine="13622" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="13483" r="6730" b="13627">Ritter’s</wd>

<space/>

<wd l="6835" t="13483" r="7877" b="13627">LabeledLDA</wd>

<space/>

<wd l="7978" t="13483" r="8294" b="13627">lists</wd>

<space/>

<wd l="8410" t="13483" r="8918" b="13661">(Ritter</wd>

<space/>

<wd l="9024" t="13502" r="9163" b="13627">et</wd>

<space/>

<wd l="9269" t="13483" r="9499" b="13651">al.,</wd>

<space/>

<wd l="9624" t="13483" r="10075" b="13661">2011)</wd>

<space/>

<wd l="10190" t="13483" r="10507" b="13661">(du-</wd>

</ln>

<ln l="6149" t="13723" r="10517" b="13910" baseLine="13858" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="6149" t="13723" r="6782" b="13906">plicated</wd>

<space/>

<wd l="6859" t="13723" r="7013" b="13862">in</wd>

<space/>

<wd l="7094" t="13723" r="7334" b="13867">the</wd>

<space/>

<wd l="7416" t="13723" r="8069" b="13867">baseline</wd>

<space/>

<wd l="8160" t="13723" r="8813" b="13910">system),</wd>

<space/>

<wd l="8909" t="13723" r="9192" b="13867">and</wd>

<space/>

<wd l="9274" t="13723" r="10018" b="13867">ANNIE’s</wd>

<space/>

<wd l="10109" t="13723" r="10517" b="13867">other</wd>

</ln>

</para>

<rulerline l="6144" t="14078" r="7349" b="14078" type="single" width="10" color="000000"/>

<para l="6149" t="14131" r="10526" b="15120" alignment="left" ri="216" spaceBefore="59" fli="144" lsp="exactly" lspExact="198" language="en">

<ln l="6413" t="14131" r="10526" b="14333" baseLine="14288">

<wd l="6413" t="14131" r="6749" b="14294"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0">1</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0">The</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="6878" t="14165" r="7512" b="14294">Freebase</wd>

<space/>

<wd l="7642" t="14165" r="8146" b="14333">project</wd>

<space/>

<wd l="8270" t="14165" r="8381" b="14294">is</wd>

<space/>

<wd l="8515" t="14165" r="8914" b="14333">being</wd>

<space/>

<wd l="9048" t="14165" r="9960" b="14294">discontinued</wd>

<space/>

<wd l="10090" t="14203" r="10229" b="14294">as</wd>

<space/>

<wd l="10368" t="14165" r="10526" b="14294">of</wd>

<space/>

</run>

</ln>

<ln l="6149" t="14352" r="10517" b="14534" baseLine="14486" underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="14371" r="6475" b="14534">May</wd>

<space/>

<wd l="6595" t="14366" r="6984" b="14520">2015,</wd>

<space/>

<wd l="7118" t="14366" r="7757" b="14520">however,</wd>

<space/>

<wd l="7891" t="14366" r="8107" b="14496">the</wd>

<space/>

<wd l="8222" t="14366" r="8520" b="14496">data</wd>

<space/>

<wd l="8630" t="14366" r="8741" b="14496">is</wd>

<space/>

<wd l="8861" t="14366" r="9254" b="14534">being</wd>

<space/>

<wd l="9370" t="14366" r="10085" b="14534">integrated</wd>

<space/>

<wd l="10195" t="14366" r="10517" b="14496">with</wd>

<space/>

</ln>

<ln l="6149" t="14563" r="10474" b="14722" baseLine="14687">

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6149" t="14563" r="6802" b="14693">Wikidata</wd>

<space/>

<wd l="6970" t="14563" r="7757" b="14722">(Vrandeˇci´c</wd>

<space/>

<wd l="7930" t="14563" r="8189" b="14693">and</wd>

<space/>

<wd l="8357" t="14563" r="9038" b="14717">Kr¨otzsch,</wd>

<space/>

<wd l="9245" t="14563" r="9691" b="14722">2014).</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0"><wd l="9874" t="14578" r="10474" b="14722">https:</wd>

<space/>

</run>

</ln>

<ln l="6168" t="14765" r="10435" b="14923" baseLine="14885" underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0">

<wd l="6168" t="14770" r="10435" b="14923">//plus.google.com/109936836907132434202/</wd>

<space/>

</ln>

<ln l="6149" t="14966" r="7963" b="15120" baseLine="15086" underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0">

<wd l="6149" t="14966" r="7963" b="15120">posts/3aYFVNf92A1</wd>

</ln>

</para>

<para l="6403" t="15149" r="9077" b="15350" alignment="left" li="144" spaceBefore="31" spaceAfter="58" lsp="exactly" lspExact="198" language="en">

<ln l="6403" t="15149" r="9077" b="15350" baseLine="15299">

<wd l="6403" t="15149" r="6730" b="15312"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">2</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">See</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="6773" t="15182" r="9077" b="15350">http://www.imdb.com/interfaces</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="1445" t="15736" r="10800" b="15977">

<para l="5800" t="15792" r="6143" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5866" t="15792" r="6077" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="41">

<wd l="5866" t="15792" r="6077" b="15946">49</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4306.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1438" marginTop="1296" marginRight="1380" marginBottom="1302" offsetX="-26" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1438" t="1296" r="10529" b="15363">

<column l="1438" t="1296" r="5820" b="15363">

<table l="1762" t="1306" r="5460" b="3926" alignment="left" li="324" ri="360" spaceBefore="10" spaceAfter="176">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<gridTable>

<gridCol>1281</gridCol>

<gridCol>806</gridCol>

<gridCol>802</gridCol>

<gridCol>809</gridCol>

<gridRow>206</gridRow>

<gridRow>183</gridRow>

<gridRow>201</gridRow>

<gridRow>197</gridRow>

<gridRow>202</gridRow>

<gridRow>197</gridRow>

<gridRow>201</gridRow>

<gridRow>197</gridRow>

<gridRow>197</gridRow>

<gridRow>201</gridRow>

<gridRow>226</gridRow>

<gridRow>206</gridRow>

<gridRow>206</gridRow>

</gridTable>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1997" t="1325" r="2846" b="1488" alignment="left" li="235" spaceAfter="16" lsp="exactly" lspExact="190" language="en">

<ln l="1997" t="1325" r="2846" b="1488" baseLine="1445" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1997" t="1325" r="2472" b="1488">Entity</wd>

<space/>

<wd l="2520" t="1334" r="2846" b="1488">type</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="3394" t="1325" r="3504" b="1450" alignment="centered" spaceAfter="16" lsp="exactly" lspExact="190" language="en">

<ln l="3394" t="1325" r="3504" b="1450" baseLine="1445" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3394" t="1325" r="3504" b="1450">P</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4190" t="1325" r="4315" b="1450" alignment="centered" spaceAfter="16" lsp="exactly" lspExact="190" language="en">

<ln l="4190" t="1325" r="4315" b="1450" baseLine="1445" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4190" t="1325" r="4315" b="1450">R</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="0" gridRowTill="0" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4954" t="1325" r="5146" b="1450" alignment="centered" spaceAfter="16" lsp="exactly" lspExact="190" language="en">

<ln l="4954" t="1325" r="5146" b="1450" baseLine="1445" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4954" t="1325" r="5146" b="1450">F1</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="1997" t="1536" r="2650" b="1695" alignment="left" li="235" lsp="exactly" lspExact="172" language="en">

<ln l="1997" t="1536" r="2650" b="1699" baseLine="1656" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1997" t="1570" r="2650" b="1699">company</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="1" gridRowTill="1" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="3250" t="1531" r="3643" b="1661" alignment="centered" lsp="exactly" lspExact="172" language="en">

<ln l="3250" t="1531" r="3643" b="1661" baseLine="1656" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3250" t="1531" r="3643" b="1661">28.07</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="4051" t="1531" r="4445" b="1661" alignment="left" lsp="exactly" lspExact="172" language="en">

<tabs position="4051"/>

<ln l="4051" t="1531" r="4445" b="1661" baseLine="1656" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4051" t="1531" r="4445" b="1661">41.03</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="1" gridRowTill="1" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<para l="4858" t="1531" r="5246" b="1661" alignment="left" lsp="exactly" lspExact="172" language="en">

<tabs position="4858"/>

<ln l="4858" t="1531" r="5246" b="1661" baseLine="1656" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4858" t="1531" r="5246" b="1661">33.33</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="1997" t="1733" r="2501" b="1896" alignment="left" li="235" lsp="exactly" lspExact="189" language="en">

<ln l="1997" t="1733" r="2501" b="1901" baseLine="1853" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1997" t="1733" r="2501" b="1901">facility</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="2" gridRowTill="2" alignment="left" verticalAlignment="middle">

<para l="3250" t="1733" r="3648" b="1862" alignment="centered" lsp="exactly" lspExact="189" language="en">

<ln l="3250" t="1733" r="3648" b="1862" baseLine="1853" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3250" t="1733" r="3648" b="1862">25.00</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="4056" t="1733" r="4445" b="1862" alignment="left" lsp="exactly" lspExact="189" language="en">

<tabs position="4056"/>

<ln l="4056" t="1733" r="4445" b="1862" baseLine="1853" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4056" t="1733" r="4445" b="1862">23.68</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="2" gridRowTill="2" alignment="decimal" verticalAlignment="middle">

<para l="4858" t="1733" r="5251" b="1862" alignment="left" lsp="exactly" lspExact="189" language="en">

<tabs position="4858"/>

<ln l="4858" t="1733" r="5251" b="1862" baseLine="1853" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4858" t="1733" r="5251" b="1862">24.32</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="2002" t="1930" r="2530" b="2093" alignment="left" li="235" lsp="exactly" lspExact="192" language="en">

<ln l="2002" t="1930" r="2530" b="2098" baseLine="2054" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="2002" t="1930" r="2530" b="2098">geo-loc</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="3" gridRowTill="3" alignment="left" verticalAlignment="middle">

<para l="3250" t="1930" r="3634" b="2059" alignment="centered" lsp="exactly" lspExact="192" language="en">

<ln l="3250" t="1930" r="3634" b="2059" baseLine="2054" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="3250" t="1930" r="3634" b="2059">53.91</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="4056" t="1930" r="4445" b="2059" alignment="left" lsp="exactly" lspExact="192" language="en">

<tabs position="4056"/>

<ln l="4056" t="1930" r="4445" b="2059" baseLine="2054" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4056" t="1930" r="4445" b="2059">53.45</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="3" gridRowTill="3" alignment="decimal" verticalAlignment="middle">

<para l="4858" t="1930" r="5246" b="2059" alignment="left" lsp="exactly" lspExact="192" language="en">

<tabs position="4858"/>

<ln l="4858" t="1930" r="5246" b="2059" baseLine="2054" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4858" t="1930" r="5246" b="2059">53.68</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<para l="1997" t="2131" r="2438" b="2261" alignment="left" li="235" lsp="exactly" lspExact="193" language="en">

<ln l="1997" t="2131" r="2438" b="2261" baseLine="2251" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1997" t="2131" r="2438" b="2261">movie</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="4" gridRowTill="4" alignment="left" verticalAlignment="middle">

<para l="3250" t="2131" r="3648" b="2261" alignment="centered" lsp="exactly" lspExact="193" language="en">

<ln l="3250" t="2131" r="3648" b="2261" baseLine="2251" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3250" t="2131" r="3648" b="2261">20.00</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="4099" t="2131" r="4402" b="2261" alignment="left" lsp="exactly" lspExact="193" language="en">

<tabs position="4099"/>

<ln l="4099" t="2131" r="4402" b="2261" baseLine="2251" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4099" t="2131" r="4402" b="2261">6.67</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="4" gridRowTill="4" alignment="decimal" verticalAlignment="middle">

<para l="4872" t="2131" r="5251" b="2261" alignment="left" lsp="exactly" lspExact="193" language="en">

<tabs position="4872"/>

<ln l="4872" t="2131" r="5251" b="2261" baseLine="2251" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4872" t="2131" r="5251" b="2261">10.00</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<para l="1997" t="2328" r="2789" b="2458" alignment="left" li="235" lsp="exactly" lspExact="196" language="en">

<ln l="1997" t="2328" r="2789" b="2458" baseLine="2453" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1997" t="2328" r="2789" b="2458">musicartist</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="5" gridRowTill="5" alignment="left" verticalAlignment="middle">

<para l="3264" t="2328" r="3648" b="2458" alignment="centered" lsp="exactly" lspExact="196" language="en">

<ln l="3264" t="2328" r="3648" b="2458" baseLine="2453" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="3264" t="2328" r="3648" b="2458">14.29</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<para l="4099" t="2328" r="4406" b="2458" alignment="left" lsp="exactly" lspExact="196" language="en">

<tabs position="4099"/>

<ln l="4099" t="2328" r="4406" b="2458" baseLine="2453" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="4099" t="2328" r="4406" b="2458">2.44</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="5" gridRowTill="5" alignment="decimal" verticalAlignment="middle">

<para l="4896" t="2328" r="5203" b="2458" alignment="left" lsp="exactly" lspExact="196" language="en">

<tabs position="4896"/>

<ln l="4896" t="2328" r="5203" b="2458" baseLine="2453" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="4896" t="2328" r="5203" b="2458">4.17</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<para l="2002" t="2530" r="2366" b="2659" alignment="left" li="235" lsp="exactly" lspExact="198" language="en">

<ln l="2002" t="2530" r="2366" b="2659" baseLine="2650" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2002" t="2530" r="2366" b="2659">other</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="6" gridRowTill="6" alignment="left" verticalAlignment="middle">

<para l="3245" t="2530" r="3643" b="2659" alignment="centered" lsp="exactly" lspExact="198" language="en">

<ln l="3245" t="2530" r="3643" b="2659" baseLine="2650" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3245" t="2530" r="3643" b="2659">45.78</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<para l="4056" t="2530" r="4450" b="2659" alignment="left" lsp="exactly" lspExact="198" language="en">

<tabs position="4056"/>

<ln l="4056" t="2530" r="4450" b="2659" baseLine="2650" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4056" t="2530" r="4450" b="2659">28.79</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="6" gridRowTill="6" alignment="decimal" verticalAlignment="middle">

<para l="4858" t="2530" r="5246" b="2659" alignment="left" lsp="exactly" lspExact="198" language="en">

<tabs position="4858"/>

<ln l="4858" t="2530" r="5246" b="2659" baseLine="2650" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4858" t="2530" r="5246" b="2659">35.35</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<para l="1997" t="2726" r="2477" b="2890" alignment="left" li="235" lsp="exactly" lspExact="187" language="en">

<ln l="1997" t="2726" r="2477" b="2894" baseLine="2851" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1997" t="2765" r="2477" b="2894">person</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="7" gridRowTill="7" alignment="left" verticalAlignment="middle">

<para l="3250" t="2726" r="3643" b="2856" alignment="centered" lsp="exactly" lspExact="187" language="en">

<ln l="3250" t="2726" r="3643" b="2856" baseLine="2851" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3250" t="2726" r="3643" b="2856">54.63</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<para l="4056" t="2726" r="4450" b="2856" alignment="left" lsp="exactly" lspExact="187" language="en">

<tabs position="4056"/>

<ln l="4056" t="2726" r="4450" b="2856" baseLine="2851" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4056" t="2726" r="4450" b="2856">65.50</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="7" gridRowTill="7" alignment="decimal" verticalAlignment="middle">

<para l="4858" t="2726" r="5246" b="2856" alignment="left" lsp="exactly" lspExact="187" language="en">

<tabs position="4858"/>

<ln l="4858" t="2726" r="5246" b="2856" baseLine="2851" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="4858" t="2726" r="5246" b="2856">59.57</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="8" gridRowTill="8" alignment="left" verticalAlignment="middle">

<para l="1997" t="2928" r="2549" b="3087" alignment="left" li="235" lsp="exactly" lspExact="191" language="en">

<ln l="1997" t="2928" r="2549" b="3096" baseLine="3048" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1997" t="2928" r="2549" b="3096">product</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="8" gridRowTill="8" alignment="left" verticalAlignment="middle">

<para l="3250" t="2928" r="3643" b="3058" alignment="centered" lsp="exactly" lspExact="191" language="en">

<ln l="3250" t="2928" r="3643" b="3058" baseLine="3048" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="3250" t="2928" r="3643" b="3058">27.78</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="8" gridRowTill="8" alignment="decimal" verticalAlignment="middle">

<para l="4070" t="2928" r="4435" b="3058" alignment="left" lsp="exactly" lspExact="191" language="en">

<tabs position="4070"/>

<ln l="4070" t="2928" r="4435" b="3058" baseLine="3048" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-6">

<wd l="4070" t="2928" r="4435" b="3058">13.51</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="8" gridRowTill="8" alignment="decimal" verticalAlignment="middle">

<para l="4872" t="2928" r="5246" b="3058" alignment="left" lsp="exactly" lspExact="191" language="en">

<tabs position="4872"/>

<ln l="4872" t="2928" r="5246" b="3058" baseLine="3048" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-5">

<wd l="4872" t="2928" r="5246" b="3058">18.18</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="9" gridRowTill="9" alignment="left" verticalAlignment="middle">

<para l="2002" t="3125" r="2774" b="3288" alignment="left" li="235" lsp="exactly" lspExact="194" language="en">

<ln l="2002" t="3125" r="2774" b="3293" baseLine="3250" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="2002" t="3144" r="2774" b="3293">sportsteam</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="9" gridRowTill="9" alignment="left" verticalAlignment="middle">

<para l="3245" t="3125" r="3648" b="3254" alignment="centered" lsp="exactly" lspExact="194" language="en">

<ln l="3245" t="3125" r="3648" b="3254" baseLine="3250" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3245" t="3125" r="3648" b="3254">42.86</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="9" gridRowTill="9" alignment="decimal" verticalAlignment="middle">

<para l="4056" t="3125" r="4435" b="3254" alignment="left" lsp="exactly" lspExact="194" language="en">

<tabs position="4056"/>

<ln l="4056" t="3125" r="4435" b="3254" baseLine="3250" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4056" t="3125" r="4435" b="3254">25.71</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="9" gridRowTill="9" alignment="decimal" verticalAlignment="middle">

<para l="4858" t="3125" r="5251" b="3254" alignment="left" lsp="exactly" lspExact="194" language="en">

<tabs position="4858"/>

<ln l="4858" t="3125" r="5251" b="3254" baseLine="3250" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4858" t="3125" r="5251" b="3254">32.14</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="10" gridRowTill="10" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="1997" t="3326" r="2506" b="3456" alignment="left" li="235" spaceAfter="14" lsp="exactly" lspExact="199" language="en">

<ln l="1997" t="3326" r="2506" b="3456" baseLine="3446" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1997" t="3326" r="2506" b="3456">tvshow</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="10" gridRowTill="10" alignment="left" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="3293" t="3326" r="3600" b="3456" alignment="centered" spaceAfter="14" lsp="exactly" lspExact="199" language="en">

<ln l="3293" t="3326" r="3600" b="3456" baseLine="3446" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="3293" t="3326" r="3600" b="3456">0.00</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="10" gridRowTill="10" alignment="decimal" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="4094" t="3326" r="4406" b="3456" alignment="left" spaceAfter="14" lsp="exactly" lspExact="199" language="en">

<tabs position="4094"/>

<ln l="4094" t="3326" r="4406" b="3456" baseLine="3446" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4094" t="3326" r="4406" b="3456">0.00</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="10" gridRowTill="10" alignment="decimal" verticalAlignment="middle">

<bottomBorder type="single" width="10"/>

<para l="4896" t="3326" r="5208" b="3456" alignment="left" spaceAfter="14" lsp="exactly" lspExact="199" language="en">

<tabs position="4896"/>

<ln l="4896" t="3326" r="5208" b="3456" baseLine="3446" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="4896" t="3326" r="5208" b="3456">0.00</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="11" gridRowTill="11" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="2002" t="3533" r="2530" b="3662" alignment="left" li="235" spaceAfter="9" lsp="exactly" lspExact="192" language="en">

<ln l="2002" t="3533" r="2530" b="3662" baseLine="3653" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="2002" t="3533" r="2530" b="3662">Overall</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="11" gridRowTill="11" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="3245" t="3533" r="3648" b="3662" alignment="centered" spaceAfter="9" lsp="exactly" lspExact="192" language="en">

<ln l="3245" t="3533" r="3648" b="3662" baseLine="3653" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="3245" t="3533" r="3648" b="3662">45.72</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="11" gridRowTill="11" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4056" t="3533" r="4450" b="3662" alignment="left" spaceAfter="9" lsp="exactly" lspExact="192" language="en">

<tabs position="4056"/>

<ln l="4056" t="3533" r="4450" b="3662" baseLine="3653" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4056" t="3533" r="4450" b="3662">39.64</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="11" gridRowTill="11" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4853" t="3533" r="5251" b="3662" alignment="left" spaceAfter="9" lsp="exactly" lspExact="192" language="en">

<tabs position="4853"/>

<ln l="4853" t="3533" r="5251" b="3662" baseLine="3653" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="4853" t="3533" r="5251" b="3662">42.46</wd>

</ln>

</para>

</cell>

<cell gridColFrom="0" gridColTill="0" gridRowFrom="12" gridRowTill="12" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="1997" t="3744" r="2630" b="3907" alignment="left" li="235" spaceAfter="4" lsp="exactly" lspExact="192" language="en">

<ln l="1997" t="3744" r="2630" b="3907" baseLine="3859" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1997" t="3744" r="2213" b="3869">No</wd>

<space/>

<wd l="2261" t="3763" r="2630" b="3907">types</wd>

</ln>

</para>

</cell>

<cell gridColFrom="1" gridColTill="1" gridRowFrom="12" gridRowTill="12" alignment="left" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="3250" t="3739" r="3634" b="3869" alignment="centered" spaceAfter="4" lsp="exactly" lspExact="192" language="en">

<ln l="3250" t="3739" r="3634" b="3869" baseLine="3859" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="3250" t="3739" r="3634" b="3869">63.81</wd>

</ln>

</para>

</cell>

<cell gridColFrom="2" gridColTill="2" gridRowFrom="12" gridRowTill="12" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4056" t="3739" r="4445" b="3869" alignment="left" spaceAfter="4" lsp="exactly" lspExact="192" language="en">

<tabs position="4056"/>

<ln l="4056" t="3739" r="4445" b="3869" baseLine="3859" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="4056" t="3739" r="4445" b="3869">56.28</wd>

</ln>

</para>

</cell>

<cell gridColFrom="3" gridColTill="3" gridRowFrom="12" gridRowTill="12" alignment="decimal" verticalAlignment="middle">

<topBorder type="single" width="10"/>

<bottomBorder type="single" width="10"/>

<para l="4858" t="3739" r="5237" b="3869" alignment="left" spaceAfter="4" lsp="exactly" lspExact="192" language="en">

<tabs position="4858"/>

<ln l="4858" t="3739" r="5237" b="3869" baseLine="3859" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-4">

<wd l="4858" t="3739" r="5237" b="3869">59.81</wd>

</ln>

</para>

</cell>

</table>

<para l="1546" t="4147" r="5693" b="4334" alignment="left" lsp="exactly" lspExact="238" language="en">

<ln l="1546" t="4147" r="5693" b="4334" baseLine="4286" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1546" t="4147" r="1982" b="4291">Table</wd>

<space/>

<wd l="2035" t="4147" r="2170" b="4291">2:</wd>

<space/>

<wd l="2251" t="4147" r="2832" b="4291">Results</wd>

<space/>

<wd l="2890" t="4147" r="3062" b="4291">of</wd>

<space/>

<wd l="3101" t="4147" r="3341" b="4291">the</wd>

<space/>

<wd l="3394" t="4147" r="3898" b="4291">USFD</wd>

<space/>

<wd l="3955" t="4152" r="4603" b="4291">W-NUT</wd>

<space/>

<wd l="4661" t="4147" r="5045" b="4291">2015</wd>

<space/>

<wd l="5112" t="4166" r="5693" b="4334">system.</wd>

</ln>

</para>

<para l="1445" t="4786" r="5813" b="6643" alignment="justified" spaceBefore="389" lsp="exactly" lspExact="240" language="en">

<ln l="1450" t="4786" r="5798" b="4973" baseLine="4920" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="4805" r="2232" b="4973">gazetteers</wd>

<space/>

<wd l="2342" t="4786" r="2942" b="4973">(largely</wd>

<space/>

<wd l="3043" t="4786" r="3845" b="4973">consisting</wd>

<space/>

<wd l="3946" t="4786" r="4118" b="4930">of</wd>

<space/>

<wd l="4205" t="4786" r="5299" b="4973">organisations,</wd>

<space/>

<wd l="5410" t="4786" r="5798" b="4930">loca-</wd>

</ln>

<ln l="1445" t="5026" r="5798" b="5203" baseLine="5160" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="5026" r="1872" b="5194">tions,</wd>

<space/>

<wd l="1968" t="5026" r="2251" b="5170">and</wd>

<space/>

<wd l="2328" t="5026" r="2654" b="5170">date</wd>

<space/>

<wd l="2736" t="5026" r="3365" b="5203">entities)</wd>

<space/>

<wd l="3456" t="5069" r="3638" b="5170">en</wd>

<space/>

<wd l="3710" t="5069" r="4238" b="5170">masse.</wd>

<space/>

<wd l="4387" t="5026" r="4786" b="5170">Each</wd>

<space/>

<wd l="4867" t="5026" r="5035" b="5170">of</wd>

<space/>

<wd l="5102" t="5026" r="5510" b="5170">these</wd>

<space/>

<wd l="5587" t="5026" r="5798" b="5165">in-</wd>

</ln>

<ln l="1445" t="5266" r="5798" b="5448" baseLine="5400" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="5266" r="2141" b="5410">troduced</wd>

<space/>

<wd l="2203" t="5309" r="2290" b="5410">a</wd>

<space/>

<wd l="2357" t="5266" r="2712" b="5448">drop</wd>

<space/>

<wd l="2774" t="5266" r="2933" b="5405">in</wd>

<space/>

<wd l="2990" t="5266" r="3998" b="5448">performance</wd>

<space/>

<wd l="4066" t="5309" r="4224" b="5410">or</wd>

<space/>

<wd l="4291" t="5309" r="4469" b="5410">an</wd>

<space/>

<wd l="4536" t="5266" r="5198" b="5410">unstable</wd>

<space/>

<wd l="5261" t="5266" r="5798" b="5448">perfor-</wd>

</ln>

<ln l="1445" t="5501" r="5803" b="5688" baseLine="5640" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="5544" r="2006" b="5669">mance,</wd>

<space/>

<wd l="2059" t="5501" r="2717" b="5688">possibly</wd>

<space/>

<wd l="2770" t="5501" r="3048" b="5645">due</wd>

<space/>

<wd l="3096" t="5520" r="3245" b="5645">to</wd>

<space/>

<wd l="3293" t="5501" r="3533" b="5645">the</wd>

<space/>

<wd l="3581" t="5501" r="4334" b="5645">increased</wd>

<space/>

<wd l="4378" t="5501" r="5208" b="5688">ambiguity.</wd>

<space/>

<wd l="5280" t="5501" r="5626" b="5645">This</wd>

<space/>

<wd l="5674" t="5501" r="5803" b="5645">is</wd>

<space/>

</ln>

<ln l="1450" t="5741" r="5813" b="5928" baseLine="5880" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="5784" r="1536" b="5885">a</wd>

<space/>

<wd l="1579" t="5741" r="2117" b="5885">known</wd>

<space/>

<wd l="2165" t="5741" r="2837" b="5923">problem</wd>

<space/>

<wd l="2880" t="5741" r="3235" b="5885">with</wd>

<space/>

<wd l="3288" t="5741" r="4416" b="5885">discriminative</wd>

<space/>

<wd l="4464" t="5741" r="5117" b="5928">learning</wd>

<space/>

<wd l="5170" t="5827" r="5270" b="5842">–</wd>

<space/>

<wd l="5323" t="5741" r="5669" b="5928">only</wd>

<space/>

<wd l="5726" t="5784" r="5813" b="5885">a</wd>

<space/>

</ln>

<ln l="1450" t="5981" r="5808" b="6168" baseLine="6115" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="5981" r="1987" b="6125">certain</wd>

<space/>

<wd l="2035" t="6000" r="2630" b="6125">amount</wd>

<space/>

<wd l="2678" t="5981" r="2851" b="6125">of</wd>

<space/>

<wd l="2894" t="6000" r="3677" b="6168">gazetteers</wd>

<space/>

<wd l="3730" t="6024" r="4070" b="6168">may</wd>

<space/>

<wd l="4118" t="5981" r="4306" b="6125">be</wd>

<space/>

<wd l="4354" t="5981" r="4718" b="6125">used</wd>

<space/>

<wd l="4771" t="6024" r="4925" b="6125">as</wd>

<space/>

<wd l="4978" t="5981" r="5602" b="6125">features</wd>

<space/>

<wd l="5654" t="5981" r="5808" b="6120">in</wd>

<space/>

</ln>

<ln l="1445" t="6221" r="5808" b="6408" baseLine="6355" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="6221" r="1723" b="6365">this</wd>

<space/>

<wd l="1771" t="6264" r="2098" b="6408">way</wd>

<space/>

<wd l="2146" t="6221" r="2650" b="6365">before</wd>

<space/>

<wd l="2693" t="6221" r="3701" b="6403">performance</wd>

<space/>

<wd l="3749" t="6221" r="3922" b="6365">of</wd>

<space/>

<wd l="3955" t="6264" r="4042" b="6365">a</wd>

<space/>

<wd l="4085" t="6221" r="5213" b="6365">discriminative</wd>

<space/>

<wd l="5256" t="6221" r="5808" b="6365">learner</wd>

<space/>

</ln>

<ln l="1450" t="6461" r="4142" b="6643" baseLine="6595" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="6461" r="1882" b="6643">drops</wd>

<space/>

<wd l="1944" t="6461" r="2477" b="6638">(Smith</wd>

<space/>

<wd l="2534" t="6461" r="2818" b="6605">and</wd>

<space/>

<wd l="2870" t="6461" r="3586" b="6629">Osborne,</wd>

<space/>

<wd l="3643" t="6461" r="4142" b="6638">2006).</wd>

</ln>

</para>

<para l="1445" t="6859" r="5054" b="7042" alignment="left" spaceBefore="166" lsp="exactly" lspExact="235" language="en">

<ln l="1445" t="6859" r="5054" b="7042" baseLine="6994" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="3">

<wd l="1445" t="6859" r="1690" b="7003">3.4</wd>

<space/>

<wd l="1891" t="6859" r="2674" b="7042">Learning</wd>

<space/>

<wd l="2726" t="6859" r="3336" b="7003">Models</wd>

<space/>

<wd l="3398" t="6859" r="3715" b="7003">and</wd>

<space/>

<wd l="3768" t="6859" r="5054" b="7042">Representation</wd>

</ln>

</para>

<para l="1445" t="7171" r="5813" b="9029" alignment="justified" spaceBefore="65" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="7171" r="5798" b="7358" baseLine="7306" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="7176" r="1661" b="7315">As</wd>

<space/>

<wd l="1781" t="7171" r="2122" b="7315">BIO</wd>

<space/>

<wd l="2242" t="7176" r="2506" b="7315">NE</wd>

<space/>

<wd l="2630" t="7171" r="3365" b="7358">chunking</wd>

<space/>

<wd l="3485" t="7171" r="3610" b="7315">is</wd>

<space/>

<wd l="3734" t="7171" r="4282" b="7358">readily</wd>

<space/>

<wd l="4406" t="7171" r="4968" b="7315">framed</wd>

<space/>

<wd l="5093" t="7214" r="5246" b="7315">as</wd>

<space/>

<wd l="5376" t="7214" r="5462" b="7315">a</wd>

<space/>

<wd l="5587" t="7214" r="5798" b="7315">se-</wd>

</ln>

<ln l="1450" t="7406" r="5798" b="7594" baseLine="7546" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="7450" r="2006" b="7589">quence</wd>

<space/>

<wd l="2059" t="7406" r="2698" b="7594">labeling</wd>

<space/>

<wd l="2755" t="7406" r="3466" b="7589">problem,</wd>

<space/>

<wd l="3523" t="7450" r="3754" b="7550">we</wd>

<space/>

<wd l="3816" t="7406" r="4896" b="7589">experimented</wd>

<space/>

<wd l="4949" t="7406" r="5304" b="7550">with</wd>

<space/>

<wd l="5366" t="7426" r="5798" b="7550">struc-</wd>

</ln>

<ln l="1445" t="7646" r="5803" b="7834" baseLine="7786" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="7646" r="1853" b="7790">tured</wd>

<space/>

<wd l="1934" t="7646" r="2626" b="7834">learning.</wd>

<space/>

<wd l="2794" t="7646" r="3091" b="7790">Out</wd>

<space/>

<wd l="3173" t="7646" r="3346" b="7790">of</wd>

<space/>

<wd l="3418" t="7646" r="3792" b="7790">CRF</wd>

<space/>

<wd l="3869" t="7646" r="4301" b="7834">using</wd>

<space/>

<wd l="4382" t="7646" r="5059" b="7790">L-BFGS</wd>

<space/>

<wd l="5150" t="7646" r="5803" b="7829">updates,</wd>

<space/>

</ln>

<ln l="1450" t="7886" r="5798" b="8074" baseLine="8021" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="7886" r="1819" b="8030">CRF</wd>

<space/>

<wd l="1882" t="7886" r="2237" b="8030">with</wd>

<space/>

<wd l="2299" t="7886" r="3778" b="8074">passive-aggressive</wd>

<space/>

<wd l="3840" t="7886" r="4445" b="8069">updates</wd>

<space/>

<wd l="4512" t="7906" r="4666" b="8030">to</wd>

<space/>

<wd l="4738" t="7886" r="5318" b="8030">combat</wd>

<space/>

<wd l="5381" t="7886" r="5798" b="8030">Twit-</wd>

</ln>

<ln l="1445" t="8126" r="5798" b="8314" baseLine="8261" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8146" r="1656" b="8270">ter</wd>

<space/>

<wd l="1699" t="8126" r="2117" b="8270">noise</wd>

<space/>

<wd l="2174" t="8126" r="3139" b="8314">(Derczynski</wd>

<space/>

<wd l="3192" t="8126" r="3475" b="8270">and</wd>

<space/>

<wd l="3518" t="8126" r="4406" b="8294">Bontcheva,</wd>

<space/>

<wd l="4464" t="8126" r="4968" b="8304">2014),</wd>

<space/>

<wd l="5026" t="8126" r="5309" b="8270">and</wd>

<space/>

<wd l="5366" t="8146" r="5798" b="8270">struc-</wd>

</ln>

<ln l="1445" t="8366" r="5798" b="8549" baseLine="8501" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8366" r="1853" b="8510">tured</wd>

<space/>

<wd l="1958" t="8386" r="2808" b="8549">perceptron</wd>

<space/>

<wd l="2923" t="8366" r="3298" b="8544">(also</wd>

<space/>

<wd l="3403" t="8366" r="3888" b="8510">useful</wd>

<space/>

<wd l="3998" t="8410" r="4195" b="8510">on</wd>

<space/>

<wd l="4301" t="8366" r="4872" b="8510">Twitter</wd>

<space/>

<wd l="4973" t="8366" r="5390" b="8510">noise</wd>

<space/>

<wd l="5510" t="8366" r="5798" b="8544">(Jo-</wd>

</ln>

<ln l="1445" t="8606" r="5813" b="8789" baseLine="8741" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8606" r="2098" b="8750">hannsen</wd>

<space/>

<wd l="2150" t="8626" r="2294" b="8750">et</wd>

<space/>

<wd l="2347" t="8606" r="2578" b="8774">al.,</wd>

<space/>

<wd l="2640" t="8606" r="3211" b="8784">2014)),</wd>

<space/>

<wd l="3274" t="8606" r="3648" b="8750">CRF</wd>

<space/>

<wd l="3696" t="8606" r="4373" b="8750">L-BFGS</wd>

<space/>

<wd l="4435" t="8606" r="5141" b="8789">provided</wd>

<space/>

<wd l="5194" t="8606" r="5434" b="8750">the</wd>

<space/>

<wd l="5486" t="8606" r="5813" b="8750">best</wd>

<space/>

</ln>

<ln l="1445" t="8842" r="5381" b="9029" baseLine="8981" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8842" r="2448" b="9024">performance</wd>

<space/>

<wd l="2506" t="8885" r="2698" b="8986">on</wd>

<space/>

<wd l="2755" t="8885" r="3014" b="8986">our</wd>

<space/>

<wd l="3067" t="8842" r="3619" b="8986">dataset</wd>

<space/>

<wd l="3667" t="8842" r="3902" b="8986">for</wd>

<space/>

<wd l="3950" t="8842" r="4190" b="8986">the</wd>

<space/>

<wd l="4243" t="8861" r="4968" b="9029">ten-types</wd>

<space/>

<wd l="5021" t="8842" r="5381" b="8986">task.</wd>

</ln>

</para>

<para l="1445" t="9245" r="3091" b="9427" alignment="left" spaceBefore="165" lsp="exactly" lspExact="235" language="en">

<ln l="1445" t="9245" r="3091" b="9427" baseLine="9379" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="7">

<wd l="1445" t="9245" r="1690" b="9389">3.5</wd>

<space/>

<wd l="1896" t="9245" r="2630" b="9427">Training</wd>

<space/>

<wd l="2678" t="9245" r="3091" b="9389">Data</wd>

</ln>

</para>

<para l="1445" t="9552" r="5813" b="12610" alignment="justified" spaceBefore="61" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="9552" r="5803" b="9739" baseLine="9691" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="9557" r="1608" b="9691">In</wd>

<space/>

<wd l="1699" t="9595" r="1958" b="9696">our</wd>

<space/>

<wd l="2045" t="9552" r="2395" b="9696">final</wd>

<space/>

<wd l="2491" t="9571" r="3077" b="9739">system,</wd>

<space/>

<wd l="3178" t="9595" r="3408" b="9696">we</wd>

<space/>

<wd l="3494" t="9552" r="4181" b="9696">included</wd>

<space/>

<wd l="4262" t="9552" r="4502" b="9696">the</wd>

<space/>

<wd l="4594" t="9552" r="4867" b="9696">dev</wd>

<space/>

<wd l="4949" t="9552" r="5333" b="9696">2015</wd>

<space/>

<wd l="5434" t="9552" r="5803" b="9720">data,</wd>

<space/>

</ln>

<ln l="1445" t="9792" r="5808" b="9974" baseLine="9931" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="9811" r="1594" b="9936">to</wd>

<space/>

<wd l="1670" t="9792" r="2256" b="9936">combat</wd>

<space/>

<wd l="2323" t="9792" r="2664" b="9936">drift</wd>

<space/>

<wd l="2731" t="9811" r="3307" b="9974">present</wd>

<space/>

<wd l="3374" t="9792" r="3533" b="9931">in</wd>

<space/>

<wd l="3595" t="9792" r="3840" b="9936">the</wd>

<space/>

<wd l="3912" t="9835" r="4478" b="9974">corpus.</wd>

<space/>

<wd l="4603" t="9797" r="4862" b="9936">We</wd>

<space/>

<wd l="4939" t="9792" r="5808" b="9974">anticipated</wd>

<space/>

</ln>

<ln l="1445" t="10032" r="5808" b="10219" baseLine="10166" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="10032" r="1742" b="10176">that</wd>

<space/>

<wd l="1829" t="10032" r="2069" b="10176">the</wd>

<space/>

<wd l="2160" t="10051" r="2438" b="10176">test</wd>

<space/>

<wd l="2534" t="10051" r="2750" b="10176">set</wd>

<space/>

<wd l="2832" t="10032" r="3331" b="10176">would</wd>

<space/>

<wd l="3418" t="10032" r="3605" b="10176">be</wd>

<space/>

<wd l="3691" t="10032" r="4085" b="10176">from</wd>

<space/>

<wd l="4171" t="10032" r="4603" b="10176">2015.</wd>

<space/>

<wd l="4790" t="10032" r="5098" b="10176">The</wd>

<space/>

<wd l="5194" t="10032" r="5808" b="10219">original</wd>

<space/>

</ln>

<ln l="1450" t="10272" r="5803" b="10459" baseLine="10406" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="10272" r="1997" b="10416">dataset</wd>

<space/>

<wd l="2088" t="10315" r="2390" b="10416">was</wd>

<space/>

<wd l="2491" t="10272" r="3254" b="10416">harvested</wd>

<space/>

<wd l="3346" t="10272" r="3499" b="10411">in</wd>

<space/>

<wd l="3595" t="10272" r="4032" b="10440">2010,</wd>

<space/>

<wd l="4147" t="10272" r="4498" b="10459">long</wd>

<space/>

<wd l="4598" t="10272" r="5179" b="10459">enough</wd>

<space/>

<wd l="5275" t="10315" r="5558" b="10459">ago</wd>

<space/>

<wd l="5654" t="10291" r="5803" b="10416">to</wd>

<space/>

</ln>

<ln l="1445" t="10512" r="5808" b="10699" baseLine="10646" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="10512" r="1627" b="10656">be</wd>

<space/>

<wd l="1714" t="10512" r="2789" b="10699">demonstrably</wd>

<space/>

<wd l="2875" t="10512" r="4008" b="10699">disadvantaged</wd>

<space/>

<wd l="4080" t="10512" r="4517" b="10656">when</wd>

<space/>

<wd l="4594" t="10512" r="5376" b="10694">compared</wd>

<space/>

<wd l="5453" t="10512" r="5808" b="10656">with</wd>

<space/>

</ln>

<ln l="1445" t="10747" r="5803" b="10925" baseLine="10886" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="10747" r="2054" b="10891">modern</wd>

<space/>

<wd l="2136" t="10747" r="2467" b="10891">data</wd>

<space/>

<wd l="2554" t="10747" r="3442" b="10925">(Fromreide</wd>

<space/>

<wd l="3528" t="10766" r="3667" b="10891">et</wd>

<space/>

<wd l="3754" t="10747" r="3984" b="10915">al.,</wd>

<space/>

<wd l="4085" t="10747" r="4589" b="10925">2014),</wd>

<space/>

<wd l="4685" t="10747" r="4973" b="10891">and</wd>

<space/>

<wd l="5059" t="10790" r="5222" b="10891">so</wd>

<space/>

<wd l="5309" t="10747" r="5424" b="10891">it</wd>

<space/>

<wd l="5501" t="10790" r="5803" b="10891">was</wd>

<space/>

</ln>

<ln l="1450" t="10987" r="5808" b="11174" baseLine="11126" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="10987" r="1992" b="11131">critical</wd>

<space/>

<wd l="2035" t="11006" r="2189" b="11131">to</wd>

<space/>

<wd l="2232" t="10987" r="2818" b="11131">include</wd>

<space/>

<wd l="2866" t="10987" r="3686" b="11174">something</wd>

<space/>

<wd l="3730" t="11030" r="4176" b="11131">more.</wd>

<space/>

<wd l="4243" t="10987" r="4555" b="11131">The</wd>

<space/>

<wd l="4598" t="11006" r="5534" b="11170">compensate</wd>

<space/>

<wd l="5578" t="10987" r="5808" b="11131">for</wd>

<space/>

</ln>

<ln l="1445" t="11227" r="5808" b="11371" baseLine="11362" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="11227" r="1685" b="11371">the</wd>

<space/>

<wd l="1738" t="11227" r="2035" b="11371">size</wd>

<space/>

<wd l="2083" t="11227" r="2899" b="11371">imbalance</wd>

<space/>

<wd l="2942" t="11314" r="3043" b="11328">–</wd>

<space/>

<wd l="3086" t="11227" r="3326" b="11371">the</wd>

<space/>

<wd l="3379" t="11227" r="3653" b="11371">dev</wd>

<space/>

<wd l="3734" t="11227" r="4118" b="11371">2015</wd>

<space/>

<wd l="4176" t="11227" r="4502" b="11371">data</wd>

<space/>

<wd l="4546" t="11227" r="4670" b="11371">is</wd>

<space/>

<wd l="4723" t="11227" r="5160" b="11371">0.175</wd>

<space/>

<wd l="5213" t="11227" r="5453" b="11371">the</wd>

<space/>

<wd l="5506" t="11227" r="5808" b="11371">size</wd>

<space/>

</ln>

<ln l="1450" t="11467" r="5813" b="11654" baseLine="11602" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="11467" r="1618" b="11611">of</wd>

<space/>

<wd l="1661" t="11467" r="1901" b="11611">the</wd>

<space/>

<wd l="1963" t="11467" r="2352" b="11611">2010</wd>

<space/>

<wd l="2410" t="11467" r="2741" b="11611">data</wd>

<space/>

<wd l="2789" t="11554" r="2890" b="11568">–</wd>

<space/>

<wd l="2942" t="11510" r="3173" b="11611">we</wd>

<space/>

<wd l="3226" t="11467" r="3960" b="11654">weighted</wd>

<space/>

<wd l="4013" t="11467" r="4450" b="11611">down</wd>

<space/>

<wd l="4498" t="11467" r="4738" b="11611">the</wd>

<space/>

<wd l="4800" t="11467" r="5203" b="11611">older</wd>

<space/>

<wd l="5261" t="11467" r="5813" b="11611">dataset</wd>

<space/>

</ln>

<ln l="1445" t="11707" r="5798" b="11894" baseLine="11842" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="11726" r="1594" b="11851">to</wd>

<space/>

<wd l="1646" t="11707" r="1838" b="11894">by</wd>

<space/>

<wd l="1896" t="11707" r="2184" b="11875">0.7,</wd>

<space/>

<wd l="2242" t="11750" r="2395" b="11851">as</wd>

<space/>

<wd l="2458" t="11707" r="3235" b="11894">suggested</wd>

<space/>

<wd l="3283" t="11707" r="3480" b="11894">by</wd>

<space/>

<wd l="3538" t="11707" r="4142" b="11894">(Cherry</wd>

<space/>

<wd l="4200" t="11707" r="4483" b="11851">and</wd>

<space/>

<wd l="4536" t="11707" r="4915" b="11875">Guo,</wd>

<space/>

<wd l="4973" t="11707" r="5477" b="11885">2015),</wd>

<space/>

<wd l="5530" t="11707" r="5798" b="11846">im-</wd>

</ln>

<ln l="1445" t="11947" r="5798" b="12134" baseLine="12082" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="11947" r="2275" b="12130">plemented</wd>

<space/>

<wd l="2323" t="11947" r="2520" b="12134">by</wd>

<space/>

<wd l="2578" t="11947" r="3370" b="12134">uniformly</wd>

<space/>

<wd l="3437" t="11947" r="3989" b="12134">scaling</wd>

<space/>

<wd l="4042" t="11947" r="4843" b="12091">individual</wd>

<space/>

<wd l="4896" t="11947" r="5448" b="12091">feature</wd>

<space/>

<wd l="5506" t="11947" r="5798" b="12091">val-</wd>

</ln>

<ln l="1445" t="12182" r="5808" b="12370" baseLine="12322" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="12226" r="1704" b="12326">ues</wd>

<space/>

<wd l="1781" t="12226" r="1973" b="12326">on</wd>

<space/>

<wd l="2045" t="12182" r="2453" b="12326">older</wd>

<space/>

<wd l="2515" t="12182" r="3283" b="12326">instances.</wd>

<space/>

<wd l="3408" t="12182" r="3754" b="12326">This</wd>

<space/>

<wd l="3835" t="12182" r="4800" b="12370">successfully</wd>

<space/>

<wd l="4867" t="12182" r="5501" b="12326">reduced</wd>

<space/>

<wd l="5563" t="12182" r="5808" b="12326">the</wd>

<space/>

</ln>

<ln l="1445" t="12422" r="4469" b="12610" baseLine="12562" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="12422" r="2102" b="12610">negative</wd>

<space/>

<wd l="2155" t="12422" r="2702" b="12605">impact</wd>

<space/>

<wd l="2750" t="12422" r="2923" b="12566">of</wd>

<space/>

<wd l="2962" t="12422" r="3206" b="12566">the</wd>

<space/>

<wd l="3259" t="12422" r="4037" b="12566">inevitable</wd>

<space/>

<wd l="4094" t="12422" r="4469" b="12566">drift.</wd>

</ln>

</para>

<para l="1445" t="12850" r="3120" b="13022" alignment="left" spaceBefore="191" lsp="exactly" lspExact="274" language="en">

<ln l="1445" t="12850" r="3120" b="13022" baseLine="13018" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="11">

<wd l="1445" t="12850" r="1560" b="13018">4</wd>

<space/>

<wd l="1800" t="12850" r="3120" b="13022">Performance</wd>

</ln>

</para>

<para l="1445" t="13253" r="5808" b="14875" alignment="justified" spaceBefore="115" lsp="exactly" lspExact="240" language="en">

<ln l="1450" t="13253" r="5808" b="13440" baseLine="13387" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="13253" r="1752" b="13397">Our</wd>

<space/>

<wd l="1819" t="13253" r="2333" b="13397">results</wd>

<space/>

<wd l="2410" t="13296" r="2650" b="13397">are</wd>

<space/>

<wd l="2722" t="13253" r="3154" b="13440">given</wd>

<space/>

<wd l="3216" t="13253" r="3374" b="13392">in</wd>

<space/>

<wd l="3437" t="13253" r="3874" b="13397">Table</wd>

<space/>

<wd l="3946" t="13253" r="4080" b="13397">2.</wd>

<space/>

<wd l="4205" t="13258" r="4421" b="13397">As</wd>

<space/>

<wd l="4498" t="13296" r="4766" b="13397">can</wd>

<space/>

<wd l="4834" t="13253" r="5021" b="13397">be</wd>

<space/>

<wd l="5098" t="13296" r="5486" b="13421">seen,</wd>

<space/>

<wd l="5563" t="13253" r="5808" b="13397">the</wd>

<space/>

</ln>

<ln l="1445" t="13493" r="5808" b="13680" baseLine="13627" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="13493" r="1766" b="13637">best</wd>

<space/>

<wd l="1834" t="13493" r="2347" b="13637">results</wd>

<space/>

<wd l="2419" t="13536" r="2803" b="13637">were</wd>

<space/>

<wd l="2880" t="13493" r="3576" b="13637">achieved</wd>

<space/>

<wd l="3643" t="13493" r="3878" b="13637">for</wd>

<space/>

<wd l="3946" t="13493" r="4186" b="13637">the</wd>

<space/>

<wd l="4258" t="13536" r="4790" b="13675">person</wd>

<space/>

<wd l="4858" t="13493" r="5146" b="13637">and</wd>

<space/>

<wd l="5218" t="13493" r="5808" b="13680">geo-loc</wd>

<space/>

</ln>

<ln l="1450" t="13733" r="5808" b="13920" baseLine="13867" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="13733" r="1891" b="13920">entity</wd>

<space/>

<wd l="1963" t="13752" r="2424" b="13920">types.</wd>

<space/>

<wd l="2549" t="13738" r="2674" b="13877">It</wd>

<space/>

<wd l="2741" t="13733" r="2866" b="13877">is</wd>

<space/>

<wd l="2942" t="13733" r="3254" b="13877">also</wd>

<space/>

<wd l="3326" t="13733" r="3792" b="13877">worth</wd>

<space/>

<wd l="3859" t="13733" r="4363" b="13920">noting</wd>

<space/>

<wd l="4435" t="13733" r="4738" b="13877">that</wd>

<space/>

<wd l="4800" t="13733" r="5808" b="13915">performance</wd>

<space/>

</ln>

<ln l="1450" t="13973" r="5798" b="14160" baseLine="14107" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="14016" r="1642" b="14117">on</wd>

<space/>

<wd l="1680" t="13973" r="1920" b="14117">the</wd>

<space/>

<wd l="1963" t="13992" r="2578" b="14160">notypes</wd>

<space/>

<wd l="2621" t="13973" r="2942" b="14117">task</wd>

<space/>

<wd l="2981" t="13973" r="3110" b="14117">is</wd>

<space/>

<wd l="3163" t="13973" r="4133" b="14160">significantly</wd>

<space/>

<wd l="4176" t="13973" r="4632" b="14117">better</wd>

<space/>

<wd l="4675" t="14016" r="5160" b="14117">across</wd>

<space/>

<wd l="5208" t="13973" r="5395" b="14117">all</wd>

<space/>

<wd l="5443" t="13992" r="5798" b="14117">met-</wd>

</ln>

<ln l="1445" t="14208" r="5798" b="14395" baseLine="14347" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="14208" r="1776" b="14376">rics,</wd>

<space/>

<wd l="1829" t="14208" r="2314" b="14352">which</wd>

<space/>

<wd l="2362" t="14208" r="3062" b="14352">indicates</wd>

<space/>

<wd l="3115" t="14208" r="3418" b="14352">that</wd>

<space/>

<wd l="3461" t="14208" r="3701" b="14352">the</wd>

<space/>

<wd l="3763" t="14227" r="4310" b="14395">system</wd>

<space/>

<wd l="4354" t="14208" r="4478" b="14352">is</wd>

<space/>

<wd l="4536" t="14208" r="5136" b="14390">capable</wd>

<space/>

<wd l="5189" t="14208" r="5362" b="14352">of</wd>

<space/>

<wd l="5400" t="14208" r="5798" b="14352">iden-</wd>

</ln>

<ln l="1445" t="14448" r="5808" b="14635" baseLine="14587" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="14448" r="1973" b="14635">tifying</wd>

<space/>

<wd l="2074" t="14448" r="2635" b="14592">entities</wd>

<space/>

<wd l="2741" t="14448" r="3475" b="14635">correctly,</wd>

<space/>

<wd l="3586" t="14448" r="3840" b="14592">but</wd>

<space/>

<wd l="3931" t="14467" r="4786" b="14592">encounters</wd>

<space/>

<wd l="4886" t="14448" r="5357" b="14592">issues</wd>

<space/>

<wd l="5453" t="14448" r="5808" b="14592">with</wd>

<space/>

</ln>

<ln l="1445" t="14688" r="3331" b="14875" baseLine="14822" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="14688" r="1810" b="14832">their</wd>

<space/>

<wd l="1858" t="14707" r="2198" b="14875">type</wd>

<space/>

<wd l="2256" t="14688" r="3331" b="14832">classification.</wd>

</ln>

</para>

<para l="1445" t="14928" r="5798" b="15355" alignment="justified" fli="216" lsp="exactly" lspExact="237" language="en">

<ln l="1642" t="14928" r="5798" b="15115" baseLine="15062" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="14933" r="1901" b="15072">We</wd>

<space/>

<wd l="1968" t="14928" r="2434" b="15072">found</wd>

<space/>

<wd l="2501" t="14928" r="2803" b="15072">that</wd>

<space/>

<wd l="2866" t="14928" r="3106" b="15072">the</wd>

<space/>

<wd l="3178" t="14928" r="3754" b="15115">biggest</wd>

<space/>

<wd l="3826" t="14928" r="4862" b="15072">contributions</wd>

<space/>

<wd l="4934" t="14947" r="5083" b="15072">to</wd>

<space/>

<wd l="5160" t="14971" r="5419" b="15072">our</wd>

<space/>

<wd l="5496" t="14971" r="5798" b="15115">sys-</wd>

</ln>

<ln l="1445" t="15168" r="5798" b="15355" baseLine="15302" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="15168" r="1867" b="15312">tem’s</wd>

<space/>

<wd l="1968" t="15168" r="2971" b="15350">performance</wd>

<space/>

<wd l="3067" t="15211" r="3451" b="15312">were</wd>

<space/>

<wd l="3547" t="15168" r="3792" b="15312">the</wd>

<space/>

<wd l="3888" t="15168" r="4594" b="15312">Freebase</wd>

<space/>

<wd l="4690" t="15187" r="5405" b="15355">gazetteer</wd>

<space/>

<wd l="5501" t="15168" r="5798" b="15312">fea-</wd>

</ln>

<ln l="0" t="0" r="0" b="0" baseLine="0" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<space/>

</ln>

</para>

</column>

<column l="6147" t="1296" r="10529" b="15363">

<para l="6149" t="1363" r="10517" b="2506" alignment="justified" spaceBefore="11" lsp="exactly" lspExact="239" language="en">

<ln l="6149" t="1363" r="10512" b="1550" baseLine="1498">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6149" t="1382" r="6581" b="1531">tures,</wd>

<space/>

<wd l="6648" t="1363" r="6931" b="1507">and</wd>

<space/>

<wd l="6989" t="1363" r="7416" b="1550">using</wd>

<space/>

<wd l="7474" t="1368" r="8016" b="1507">Brown</wd>

<space/>

<wd l="8074" t="1363" r="8674" b="1507">clusters</wd>

<space/>

<wd l="8736" t="1363" r="9091" b="1507">with</wd>

<space/>

<wd l="9144" t="1363" r="9499" b="1550">high</wd>

<space/>

<wd l="9557" t="1363" r="10056" b="1507">values</wd>

<space/>

<wd l="10123" t="1363" r="10291" b="1507">of</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="10344" t="1411" r="10512" b="1507">m</wd>

<space/>

</run>

</ln>

<ln l="6158" t="1603" r="10507" b="1790" baseLine="1738" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6158" t="1603" r="6456" b="1781">(the</wd>

<space/>

<wd l="6504" t="1603" r="7114" b="1747">number</wd>

<space/>

<wd l="7166" t="1603" r="7339" b="1747">of</wd>

<space/>

<wd l="7382" t="1603" r="7987" b="1781">classes)</wd>

<space/>

<wd l="8050" t="1603" r="8333" b="1747">and</wd>

<space/>

<wd l="8381" t="1603" r="8770" b="1790">large</wd>

<space/>

<wd l="8827" t="1622" r="9490" b="1747">amounts</wd>

<space/>

<wd l="9547" t="1603" r="9720" b="1747">of</wd>

<space/>

<wd l="9758" t="1622" r="10248" b="1747">recent</wd>

<space/>

<wd l="10291" t="1603" r="10507" b="1742">in-</wd>

</ln>

<ln l="6149" t="1838" r="10517" b="2021" baseLine="1978" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="1858" r="6403" b="2021">put</wd>

<space/>

<wd l="6466" t="1838" r="6830" b="1982">data.</wd>

<space/>

<wd l="6922" t="1838" r="7272" b="1982">This</wd>

<space/>

<wd l="7334" t="1838" r="7579" b="1982">led</wd>

<space/>

<wd l="7642" t="1882" r="7901" b="1982">our</wd>

<space/>

<wd l="7963" t="1838" r="9096" b="2021">computational</wd>

<space/>

<wd l="9158" t="1838" r="9662" b="1982">efforts</wd>

<space/>

<wd l="9725" t="1838" r="9883" b="1978">in</wd>

<space/>

<wd l="9936" t="1838" r="10176" b="1982">the</wd>

<space/>

<wd l="10238" t="1838" r="10517" b="1982">last</wd>

<space/>

</ln>

<ln l="6149" t="2078" r="10517" b="2266" baseLine="2218" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="2078" r="6571" b="2222">week</wd>

<space/>

<wd l="6648" t="2098" r="6797" b="2222">to</wd>

<space/>

<wd l="6878" t="2078" r="7066" b="2222">be</wd>

<space/>

<wd l="7142" t="2078" r="7598" b="2222">based</wd>

<space/>

<wd l="7680" t="2078" r="8227" b="2222">around</wd>

<space/>

<wd l="8304" t="2078" r="8923" b="2266">running</wd>

<space/>

<wd l="9005" t="2078" r="9245" b="2222">the</wd>

<space/>

<wd l="9322" t="2078" r="9902" b="2266">biggest</wd>

<space/>

<wd l="9974" t="2083" r="10517" b="2222">Brown</wd>

<space/>

</ln>

<ln l="6154" t="2318" r="9077" b="2506" baseLine="2453" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="2318" r="6931" b="2506">clustering</wd>

<space/>

<wd l="6984" t="2318" r="7306" b="2462">task</wd>

<space/>

<wd l="7354" t="2318" r="7656" b="2462">that</wd>

<space/>

<wd l="7704" t="2362" r="7934" b="2462">we</wd>

<space/>

<wd l="7992" t="2318" r="8429" b="2462">could</wd>

<space/>

<wd l="8477" t="2318" r="8635" b="2458">in</wd>

<space/>

<wd l="8683" t="2318" r="9077" b="2462">time.</wd>

</ln>

</para>

<para l="6149" t="2558" r="10517" b="4896" alignment="justified" fli="144" lsp="exactly" lspExact="239" language="en">

<ln l="6346" t="2558" r="10507" b="2746" baseLine="2693" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="2563" r="6605" b="2702">We</wd>

<space/>

<wd l="6706" t="2558" r="7018" b="2702">also</wd>

<space/>

<wd l="7114" t="2558" r="7555" b="2702">noted</wd>

<space/>

<wd l="7651" t="2558" r="8165" b="2746">during</wd>

<space/>

<wd l="8261" t="2558" r="8789" b="2746">testing</wd>

<space/>

<wd l="8885" t="2558" r="9226" b="2726">that,</wd>

<space/>

<wd l="9336" t="2558" r="9773" b="2702">while</wd>

<space/>

<wd l="9869" t="2558" r="10507" b="2741">passive-</wd>

</ln>

<ln l="6154" t="2798" r="10507" b="2986" baseLine="2933" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="2798" r="6979" b="2986">aggressive</wd>

<space/>

<wd l="7075" t="2798" r="7445" b="2942">CRF</wd>

<space/>

<wd l="7531" t="2798" r="8136" b="2981">updates</wd>

<space/>

<wd l="8227" t="2798" r="8760" b="2981">helped</wd>

<space/>

<wd l="8846" t="2798" r="9202" b="2942">with</wd>

<space/>

<wd l="9298" t="2798" r="10171" b="2986">single-type</wd>

<space/>

<wd l="10262" t="2842" r="10507" b="2942">en-</wd>

</ln>

<ln l="6149" t="3034" r="10507" b="3221" baseLine="3173" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="3034" r="6413" b="3221">tity</wd>

<space/>

<wd l="6475" t="3034" r="7387" b="3221">recognition</wd>

<space/>

<wd l="7445" t="3034" r="7603" b="3173">in</wd>

<space/>

<wd l="7666" t="3053" r="8170" b="3178">tweets</wd>

<space/>

<wd l="8246" t="3034" r="9206" b="3221">(Derczynski</wd>

<space/>

<wd l="9278" t="3034" r="9562" b="3178">and</wd>

<space/>

<wd l="9624" t="3034" r="10507" b="3202">Bontcheva,</wd>

<space/>

</ln>

<ln l="6154" t="3274" r="10517" b="3461" baseLine="3413" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="3274" r="6658" b="3451">2014),</wd>

<space/>

<wd l="6758" t="3274" r="6869" b="3418">it</wd>

<space/>

<wd l="6950" t="3317" r="7253" b="3418">was</wd>

<space/>

<wd l="7349" t="3274" r="8251" b="3418">detrimental</wd>

<space/>

<wd l="8338" t="3293" r="8491" b="3418">to</wd>

<space/>

<wd l="8582" t="3317" r="8760" b="3418">an</wd>

<space/>

<wd l="8856" t="3274" r="9533" b="3461">all-types</wd>

<space/>

<wd l="9629" t="3293" r="10214" b="3461">system.</wd>

<space/>

<wd l="10392" t="3278" r="10517" b="3418">It</wd>

<space/>

</ln>

<ln l="6149" t="3514" r="10507" b="3701" baseLine="3648" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="3557" r="6451" b="3658">was</wd>

<space/>

<wd l="6538" t="3514" r="6850" b="3658">also</wd>

<space/>

<wd l="6931" t="3533" r="7190" b="3658">not</wd>

<space/>

<wd l="7267" t="3514" r="7829" b="3696">helpful</wd>

<space/>

<wd l="7910" t="3514" r="8141" b="3658">for</wd>

<space/>

<wd l="8218" t="3514" r="8462" b="3658">the</wd>

<space/>

<wd l="8539" t="3538" r="9221" b="3701">no-types</wd>

<space/>

<wd l="9307" t="3514" r="9667" b="3682">task,</wd>

<space/>

<wd l="9758" t="3514" r="10243" b="3658">where</wd>

<space/>

<wd l="10325" t="3518" r="10507" b="3653">L-</wd>

</ln>

<ln l="6149" t="3754" r="10507" b="3941" baseLine="3888" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="3754" r="6638" b="3898">BFGS</wd>

<space/>

<wd l="6706" t="3754" r="7310" b="3936">updates</wd>

<space/>

<wd l="7382" t="3754" r="7810" b="3941">again</wd>

<space/>

<wd l="7872" t="3797" r="8237" b="3941">gave</wd>

<space/>

<wd l="8299" t="3754" r="8755" b="3898">better</wd>

<space/>

<wd l="8813" t="3754" r="9859" b="3936">performance.</wd>

<space/>

<wd l="9965" t="3754" r="10315" b="3898">This</wd>

<space/>

<wd l="10382" t="3754" r="10507" b="3898">is</wd>

<space/>

</ln>

<ln l="6149" t="3994" r="10507" b="4181" baseLine="4128" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="3994" r="6797" b="4138">rational:</wd>

<space/>

<wd l="6902" t="3994" r="7142" b="4138">the</wd>

<space/>

<wd l="7214" t="3994" r="7891" b="4181">all-types</wd>

<space/>

<wd l="7963" t="3994" r="8246" b="4138">and</wd>

<space/>

<wd l="8309" t="3994" r="9456" b="4181">multiple-types</wd>

<space/>

<wd l="9523" t="3994" r="9917" b="4138">tasks</wd>

<space/>

<wd l="9989" t="4037" r="10229" b="4138">are</wd>

<space/>

<wd l="10296" t="3994" r="10507" b="4138">ef-</wd>

</ln>

<ln l="6149" t="4234" r="10507" b="4421" baseLine="4368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="4234" r="6835" b="4421">fectively</wd>

<space/>

<wd l="6898" t="4234" r="7445" b="4378">similar</wd>

<space/>

<wd l="7493" t="4234" r="7930" b="4378">when</wd>

<space/>

<wd l="7982" t="4234" r="8798" b="4378">contrasted</wd>

<space/>

<wd l="8851" t="4234" r="9206" b="4378">with</wd>

<space/>

<wd l="9254" t="4234" r="9494" b="4378">the</wd>

<space/>

<wd l="9562" t="4234" r="10507" b="4421">single-types</wd>

<space/>

</ln>

<ln l="6149" t="4469" r="10507" b="4656" baseLine="4608" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="4469" r="6514" b="4637">task,</wd>

<space/>

<wd l="6590" t="4469" r="6744" b="4608">in</wd>

<space/>

<wd l="6806" t="4469" r="7109" b="4613">that</wd>

<space/>

<wd l="7171" t="4469" r="7507" b="4656">they</wd>

<space/>

<wd l="7579" t="4469" r="8141" b="4651">require</wd>

<space/>

<wd l="8208" t="4469" r="8448" b="4613">the</wd>

<space/>

<wd l="8515" t="4469" r="9427" b="4656">recognition</wd>

<space/>

<wd l="9494" t="4469" r="9667" b="4613">of</wd>

<space/>

<wd l="9720" t="4512" r="10157" b="4656">many</wd>

<space/>

<wd l="10229" t="4469" r="10507" b="4613">dif-</wd>

</ln>

<ln l="6149" t="4709" r="8419" b="4896" baseLine="4848" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="4709" r="6614" b="4853">ferent</wd>

<space/>

<wd l="6662" t="4709" r="7090" b="4853">kinds</wd>

<space/>

<wd l="7147" t="4709" r="7320" b="4853">of</wd>

<space/>

<wd l="7358" t="4709" r="7891" b="4853">named</wd>

<space/>

<wd l="7944" t="4709" r="8419" b="4896">entity.</wd>

</ln>

</para>

<para l="6149" t="4949" r="10517" b="7037" alignment="justified" fli="144" lsp="exactly" lspExact="239" language="en">

<ln l="6346" t="4949" r="10517" b="5136" baseLine="5083" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="4949" r="6941" b="5136">Finally,</wd>

<space/>

<wd l="6998" t="4992" r="7229" b="5093">we</wd>

<space/>

<wd l="7277" t="4949" r="7742" b="5093">found</wd>

<space/>

<wd l="7790" t="4949" r="8088" b="5093">that</wd>

<space/>

<wd l="8141" t="4949" r="8544" b="5093">other</wd>

<space/>

<wd l="8597" t="4968" r="9312" b="5136">gazetteer</wd>

<space/>

<wd l="9355" t="4968" r="9773" b="5136">types</wd>

<space/>

<wd l="9826" t="4992" r="10210" b="5093">were</wd>

<space/>

<wd l="10258" t="4968" r="10517" b="5093">not</wd>

<space/>

</ln>

<ln l="6149" t="5189" r="10512" b="5376" baseLine="5323" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="5189" r="6710" b="5371">helpful</wd>

<space/>

<wd l="6782" t="5208" r="6936" b="5333">to</wd>

<space/>

<wd l="7008" t="5189" r="8064" b="5371">performance;</wd>

<space/>

<wd l="8150" t="5189" r="8645" b="5376">taking</wd>

<space/>

<wd l="8722" t="5189" r="8952" b="5333">for</wd>

<space/>

<wd l="9029" t="5189" r="9691" b="5371">example</wd>

<space/>

<wd l="9768" t="5189" r="9955" b="5333">all</wd>

<space/>

<wd l="10037" t="5189" r="10210" b="5333">of</wd>

<space/>

<wd l="10272" t="5189" r="10512" b="5333">the</wd>

<space/>

</ln>

<ln l="6149" t="5429" r="10507" b="5616" baseLine="5563" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="5434" r="6768" b="5573">ANNIE</wd>

<space/>

<wd l="6835" t="5448" r="7670" b="5616">gazetteers,</wd>

<space/>

<wd l="7752" t="5448" r="8534" b="5616">gazetteers</wd>

<space/>

<wd l="8606" t="5429" r="8995" b="5573">from</wd>

<space/>

<wd l="9058" t="5429" r="9542" b="5573">IMDb</wd>

<space/>

<wd l="9614" t="5429" r="10186" b="5611">dumps,</wd>

<space/>

<wd l="10262" t="5472" r="10507" b="5573">en-</wd>

</ln>

<ln l="6149" t="5664" r="10507" b="5851" baseLine="5803" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="5664" r="6413" b="5851">tity</wd>

<space/>

<wd l="6475" t="5707" r="6979" b="5808">names</wd>

<space/>

<wd l="7051" t="5664" r="7776" b="5808">extracted</wd>

<space/>

<wd l="7838" t="5664" r="8227" b="5808">from</wd>

<space/>

<wd l="8294" t="5664" r="8698" b="5808">other</wd>

<space/>

<wd l="8760" t="5664" r="9331" b="5808">Twitter</wd>

<space/>

<wd l="9394" t="5669" r="9792" b="5808">NER</wd>

<space/>

<wd l="9859" t="5707" r="10507" b="5846">corpora,</wd>

<space/>

</ln>

<ln l="6154" t="5904" r="10507" b="6091" baseLine="6043" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="5947" r="6317" b="6048">or</wd>

<space/>

<wd l="6360" t="5904" r="6926" b="6048">entities</wd>

<space/>

<wd l="6979" t="5904" r="7747" b="6091">generated</wd>

<space/>

<wd l="7790" t="5904" r="8410" b="6091">through</wd>

<space/>

<wd l="8448" t="5909" r="8976" b="6043">LLDA</wd>

<space/>

<wd l="9024" t="5904" r="9538" b="6082">(Ritter</wd>

<space/>

<wd l="9581" t="5923" r="9725" b="6048">et</wd>

<space/>

<wd l="9768" t="5904" r="9998" b="6072">al.,</wd>

<space/>

<wd l="10056" t="5904" r="10507" b="6082">2011)</wd>

<space/>

</ln>

<ln l="6154" t="6144" r="10512" b="6326" baseLine="6278" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="6144" r="6346" b="6288">all</wd>

<space/>

<wd l="6422" t="6144" r="7205" b="6288">decreased</wd>

<space/>

<wd l="7272" t="6144" r="8318" b="6326">performance.</wd>

<space/>

<wd l="8453" t="6149" r="8712" b="6288">We</wd>

<space/>

<wd l="8794" t="6163" r="9374" b="6326">suspect</wd>

<space/>

<wd l="9442" t="6144" r="9720" b="6288">this</wd>

<space/>

<wd l="9797" t="6144" r="9926" b="6288">is</wd>

<space/>

<wd l="10008" t="6144" r="10286" b="6288">due</wd>

<space/>

<wd l="10358" t="6163" r="10512" b="6288">to</wd>

<space/>

</ln>

<ln l="6149" t="6384" r="10512" b="6571" baseLine="6518" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6384" r="6514" b="6528">their</wd>

<space/>

<wd l="6610" t="6384" r="7416" b="6571">swamping</wd>

<space/>

<wd l="7512" t="6384" r="8587" b="6571">already-small</wd>

<space/>

<wd l="8678" t="6384" r="9091" b="6566">input</wd>

<space/>

<wd l="9182" t="6384" r="9730" b="6528">dataset</wd>

<space/>

<wd l="9816" t="6384" r="10171" b="6528">with</wd>

<space/>

<wd l="10258" t="6403" r="10512" b="6528">too</wd>

<space/>

</ln>

<ln l="6154" t="6624" r="10507" b="6811" baseLine="6758" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="6643" r="6547" b="6811">great</wd>

<space/>

<wd l="6634" t="6667" r="6720" b="6768">a</wd>

<space/>

<wd l="6797" t="6624" r="7560" b="6806">profusion</wd>

<space/>

<wd l="7646" t="6624" r="7819" b="6768">of</wd>

<space/>

<wd l="7886" t="6624" r="8870" b="6792">information,</wd>

<space/>

<wd l="8971" t="6624" r="9211" b="6768">c.f.</wd>

<space/>

<wd l="9307" t="6624" r="9778" b="6768">Smith</wd>

<space/>

<wd l="9864" t="6624" r="10147" b="6768">and</wd>

<space/>

<wd l="10229" t="6624" r="10507" b="6768">Os-</wd>

</ln>

<ln l="6149" t="6859" r="7219" b="7037" baseLine="6998" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6859" r="6600" b="7003">borne</wd>

<space/>

<wd l="6662" t="6859" r="7219" b="7037">(2006).</wd>

</ln>

</para>

<para l="6149" t="7099" r="10517" b="8722" alignment="justified" fli="144" lsp="exactly" lspExact="238" language="en">

<ln l="6346" t="7099" r="10517" b="7286" baseLine="7238" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="7104" r="6514" b="7238">In</wd>

<space/>

<wd l="6557" t="7099" r="7248" b="7267">addition,</wd>

<space/>

<wd l="7296" t="7142" r="7526" b="7243">we</wd>

<space/>

<wd l="7565" t="7099" r="7934" b="7243">tried</wd>

<space/>

<wd l="7973" t="7099" r="8808" b="7286">generating</wd>

<space/>

<wd l="8856" t="7099" r="10142" b="7282">semi-supervised</wd>

<space/>

<wd l="10186" t="7099" r="10517" b="7243">data</wd>

<space/>

</ln>

<ln l="6149" t="7339" r="10517" b="7526" baseLine="7474" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="7339" r="6576" b="7526">using</wd>

<space/>

<wd l="6638" t="7339" r="7963" b="7483">vote-constrained</wd>

<space/>

<wd l="8026" t="7339" r="9163" b="7526">bootstrapping,</wd>

<space/>

<wd l="9230" t="7339" r="9485" b="7483">but</wd>

<space/>

<wd l="9542" t="7339" r="9826" b="7483">this</wd>

<space/>

<wd l="9893" t="7382" r="10195" b="7483">was</wd>

<space/>

<wd l="10258" t="7358" r="10517" b="7483">not</wd>

<space/>

</ln>

<ln l="6149" t="7579" r="10507" b="7766" baseLine="7714" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="7579" r="6710" b="7762">helpful</wd>

<space/>

<wd l="6763" t="7579" r="7210" b="7723">either</wd>

<space/>

<wd l="7258" t="7666" r="7358" b="7680">–</wd>

<space/>

<wd l="7402" t="7579" r="8328" b="7766">presumably</wd>

<space/>

<wd l="8381" t="7579" r="8659" b="7723">due</wd>

<space/>

<wd l="8707" t="7598" r="8856" b="7723">to</wd>

<space/>

<wd l="8904" t="7579" r="9149" b="7723">the</wd>

<space/>

<wd l="9192" t="7579" r="9811" b="7766">initially</wd>

<space/>

<wd l="9859" t="7579" r="10147" b="7723">low</wd>

<space/>

<wd l="10195" t="7622" r="10507" b="7762">per-</wd>

</ln>

<ln l="6149" t="7819" r="10517" b="8006" baseLine="7954" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="7819" r="6898" b="7963">formance</wd>

<space/>

<wd l="6979" t="7819" r="7152" b="7963">of</wd>

<space/>

<wd l="7214" t="7819" r="8606" b="8006">machine-learning</wd>

<space/>

<wd l="8683" t="7819" r="9134" b="7963">based</wd>

<space/>

<wd l="9211" t="7819" r="9590" b="7963">tools</wd>

<space/>

<wd l="9677" t="7862" r="9869" b="7963">on</wd>

<space/>

<wd l="9941" t="7819" r="10517" b="7963">Twitter</wd>

<space/>

</ln>

<ln l="6149" t="8059" r="10507" b="8246" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="8064" r="6547" b="8203">NER</wd>

<space/>

<wd l="6605" t="8059" r="7200" b="8246">making</wd>

<space/>

<wd l="7262" t="8059" r="7373" b="8203">it</wd>

<space/>

<wd l="7430" t="8059" r="7786" b="8203">hard</wd>

<space/>

<wd l="7843" t="8078" r="7997" b="8203">to</wd>

<space/>

<wd l="8064" t="8059" r="8678" b="8242">develop</wd>

<space/>

<wd l="8750" t="8059" r="10037" b="8242">semi-supervised</wd>

<space/>

<wd l="10094" t="8059" r="10507" b="8203">boot-</wd>

</ln>

<ln l="6158" t="8294" r="10507" b="8482" baseLine="8434" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6158" t="8294" r="6826" b="8477">strapped</wd>

<space/>

<wd l="6883" t="8294" r="7498" b="8482">training</wd>

<space/>

<wd l="7565" t="8294" r="7934" b="8462">data,</wd>

<space/>

<wd l="8002" t="8338" r="8198" b="8438">no</wd>

<space/>

<wd l="8261" t="8314" r="8770" b="8438">matter</wd>

<space/>

<wd l="8827" t="8294" r="9158" b="8438">how</wd>

<space/>

<wd l="9235" t="8294" r="9922" b="8482">stringent</wd>

<space/>

<wd l="9979" t="8294" r="10219" b="8438">the</wd>

<space/>

<wd l="10286" t="8294" r="10507" b="8434">fil-</wd>

</ln>

<ln l="6149" t="8534" r="8832" b="8722" baseLine="8674" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="8534" r="6610" b="8722">tering</wd>

<space/>

<wd l="6667" t="8534" r="6840" b="8678">of</wd>

<space/>

<wd l="6883" t="8534" r="7997" b="8722">autogenerated</wd>

<space/>

<wd l="8050" t="8534" r="8832" b="8717">examples.</wd>

</ln>

</para>

<para l="6149" t="8774" r="10517" b="11549" alignment="justified" fli="144" lsp="exactly" lspExact="239" language="en">

<ln l="6346" t="8774" r="10517" b="8942" baseLine="8909" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="8779" r="6624" b="8918">For</wd>

<space/>

<wd l="6710" t="8774" r="6955" b="8918">the</wd>

<space/>

<wd l="7051" t="8774" r="7397" b="8918">final</wd>

<space/>

<wd l="7488" t="8818" r="7800" b="8942">run,</wd>

<space/>

<wd l="7906" t="8818" r="8136" b="8918">we</wd>

<space/>

<wd l="8227" t="8818" r="8616" b="8918">were</wd>

<space/>

<wd l="8707" t="8774" r="9139" b="8918">faced</wd>

<space/>

<wd l="9226" t="8774" r="9581" b="8918">with</wd>

<space/>

<wd l="9677" t="8818" r="9763" b="8918">a</wd>

<space/>

<wd l="9854" t="8774" r="10517" b="8918">decision</wd>

<space/>

</ln>

<ln l="6154" t="9014" r="10517" b="9202" baseLine="9149" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="9014" r="6595" b="9158">about</wd>

<space/>

<wd l="6667" t="9014" r="7176" b="9202">fitting.</wd>

<space/>

<wd l="7306" t="9019" r="7565" b="9158">We</wd>

<space/>

<wd l="7642" t="9014" r="8078" b="9158">could</wd>

<space/>

<wd l="8155" t="9014" r="8602" b="9158">either</wd>

<space/>

<wd l="8674" t="9014" r="9221" b="9158">choose</wd>

<space/>

<wd l="9298" t="9058" r="9384" b="9158">a</wd>

<space/>

<wd l="9456" t="9014" r="10517" b="9202">configuration</wd>

<space/>

</ln>

<ln l="6149" t="9254" r="10507" b="9442" baseLine="9389" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9254" r="6451" b="9398">that</wd>

<space/>

<wd l="6509" t="9254" r="7354" b="9398">minimised</wd>

<space/>

<wd l="7416" t="9254" r="8030" b="9442">training</wd>

<space/>

<wd l="8102" t="9254" r="8405" b="9398">loss</wd>

<space/>

<wd l="8477" t="9298" r="8674" b="9398">on</wd>

<space/>

<wd l="8741" t="9254" r="8928" b="9398">all</wd>

<space/>

<wd l="9000" t="9254" r="9240" b="9398">the</wd>

<space/>

<wd l="9312" t="9254" r="10018" b="9398">available</wd>

<space/>

<wd l="10080" t="9254" r="10507" b="9398">train-</wd>

</ln>

<ln l="6149" t="9490" r="10507" b="9677" baseLine="9629" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9490" r="6398" b="9677">ing</wd>

<space/>

<wd l="6480" t="9490" r="6811" b="9634">data</wd>

<space/>

<wd l="6888" t="9490" r="7310" b="9667">(train</wd>

<space/>

<wd l="7387" t="9528" r="7493" b="9629">+</wd>

<space/>

<wd l="7574" t="9490" r="7848" b="9634">dev</wd>

<space/>

<wd l="7925" t="9528" r="8030" b="9629">+</wd>

<space/>

<wd l="8112" t="9490" r="8386" b="9634">dev</wd>

<space/>

<wd l="8467" t="9490" r="8971" b="9667">2015),</wd>

<space/>

<wd l="9053" t="9490" r="9307" b="9634">but</wd>

<space/>

<wd l="9374" t="9490" r="9864" b="9634">risked</wd>

<space/>

<wd l="9936" t="9490" r="10507" b="9634">overfit-</wd>

</ln>

<ln l="6149" t="9730" r="10507" b="9917" baseLine="9869" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9730" r="6456" b="9917">ting</wd>

<space/>

<wd l="6518" t="9749" r="6672" b="9874">to</wd>

<space/>

<wd l="6734" t="9730" r="6883" b="9874">it.</wd>

<space/>

<wd l="6989" t="9730" r="8064" b="9917">Alternatively,</wd>

<space/>

<wd l="8136" t="9773" r="8366" b="9874">we</wd>

<space/>

<wd l="8434" t="9730" r="8870" b="9874">could</wd>

<space/>

<wd l="8938" t="9730" r="9485" b="9874">choose</wd>

<space/>

<wd l="9552" t="9773" r="9638" b="9874">a</wd>

<space/>

<wd l="9701" t="9730" r="10507" b="9917">configura-</wd>

</ln>

<ln l="6149" t="9970" r="10517" b="10157" baseLine="10104" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9970" r="6461" b="10114">tion</wd>

<space/>

<wd l="6523" t="9970" r="6826" b="10114">that</wd>

<space/>

<wd l="6893" t="9970" r="7056" b="10114">fit</wd>

<space/>

<wd l="7118" t="9970" r="7411" b="10114">less</wd>

<space/>

<wd l="7483" t="9970" r="7867" b="10138">well,</wd>

<space/>

<wd l="7944" t="9970" r="8098" b="10109">in</wd>

<space/>

<wd l="8170" t="9970" r="8582" b="10114">order</wd>

<space/>

<wd l="8650" t="9989" r="8798" b="10114">to</wd>

<space/>

<wd l="8870" t="9970" r="9302" b="10114">avoid</wd>

<space/>

<wd l="9370" t="9970" r="10229" b="10157">overfitting.</wd>

<space/>

<wd l="10349" t="9974" r="10517" b="10109">In</wd>

<space/>

</ln>

<ln l="6149" t="10210" r="10507" b="10392" baseLine="10344" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="10210" r="6389" b="10354">the</wd>

<space/>

<wd l="6461" t="10210" r="6782" b="10378">end,</wd>

<space/>

<wd l="6854" t="10253" r="7085" b="10354">we</wd>

<space/>

<wd l="7157" t="10210" r="7771" b="10354">decided</wd>

<space/>

<wd l="7834" t="10229" r="7982" b="10354">to</wd>

<space/>

<wd l="8054" t="10210" r="8491" b="10392">adopt</wd>

<space/>

<wd l="8554" t="10210" r="8794" b="10354">the</wd>

<space/>

<wd l="8861" t="10210" r="9326" b="10354">above</wd>

<space/>

<wd l="9389" t="10210" r="10200" b="10392">principled</wd>

<space/>

<wd l="10262" t="10253" r="10507" b="10392">ap-</wd>

</ln>

<ln l="6149" t="10450" r="10507" b="10637" baseLine="10584" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="10450" r="6734" b="10632">proach,</wd>

<space/>

<wd l="6811" t="10450" r="7555" b="10637">assuming</wd>

<space/>

<wd l="7622" t="10450" r="7925" b="10594">that</wd>

<space/>

<wd l="7992" t="10450" r="8338" b="10594">final</wd>

<space/>

<wd l="8410" t="10450" r="8741" b="10594">data</wd>

<space/>

<wd l="8803" t="10450" r="9298" b="10594">would</wd>

<space/>

<wd l="9360" t="10450" r="9547" b="10594">be</wd>

<space/>

<wd l="9614" t="10450" r="10003" b="10594">from</wd>

<space/>

<wd l="10070" t="10450" r="10507" b="10618">2015,</wd>

<space/>

</ln>

<ln l="6154" t="10685" r="10517" b="10872" baseLine="10824" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="10685" r="6437" b="10829">and</wd>

<space/>

<wd l="6490" t="10685" r="7210" b="10829">therefore</wd>

<space/>

<wd l="7272" t="10685" r="8563" b="10872">down-weighting</wd>

<space/>

<wd l="8621" t="10685" r="9240" b="10872">training</wd>

<space/>

<wd l="9302" t="10685" r="9634" b="10829">data</wd>

<space/>

<wd l="9682" t="10685" r="10075" b="10829">from</wd>

<space/>

<wd l="10128" t="10685" r="10517" b="10867">prior</wd>

<space/>

</ln>

<ln l="6149" t="10925" r="10512" b="11112" baseLine="11064" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="10968" r="6605" b="11112">years.</wd>

<space/>

<wd l="6749" t="10930" r="7008" b="11069">We</wd>

<space/>

<wd l="7090" t="10925" r="7402" b="11069">also</wd>

<space/>

<wd l="7483" t="10925" r="8232" b="11069">evaluated</wd>

<space/>

<wd l="8304" t="10925" r="8544" b="11069">the</wd>

<space/>

<wd l="8630" t="10944" r="9178" b="11112">system</wd>

<space/>

<wd l="9245" t="10925" r="9686" b="11069">while</wd>

<space/>

<wd l="9763" t="10925" r="10512" b="11112">including</wd>

<space/>

</ln>

<ln l="6149" t="11165" r="10512" b="11352" baseLine="11304" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="11165" r="6389" b="11309">the</wd>

<space/>

<wd l="6461" t="11165" r="6734" b="11309">dev</wd>

<space/>

<wd l="6816" t="11165" r="7200" b="11309">2015</wd>

<space/>

<wd l="7282" t="11165" r="7608" b="11309">data</wd>

<space/>

<wd l="7670" t="11165" r="7829" b="11304">in</wd>

<space/>

<wd l="7891" t="11165" r="8136" b="11309">the</wd>

<space/>

<wd l="8203" t="11165" r="8818" b="11352">training</wd>

<space/>

<wd l="8894" t="11184" r="9154" b="11333">set,</wd>

<space/>

<wd l="9226" t="11184" r="9379" b="11309">to</wd>

<space/>

<wd l="9456" t="11208" r="9701" b="11309">see</wd>

<space/>

<wd l="9768" t="11165" r="10099" b="11309">how</wd>

<space/>

<wd l="10171" t="11165" r="10512" b="11309">well</wd>

<space/>

</ln>

<ln l="6149" t="11405" r="7661" b="11549" baseLine="11539" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="11448" r="6379" b="11549">we</wd>

<space/>

<wd l="6432" t="11405" r="6926" b="11549">would</wd>

<space/>

<wd l="6974" t="11405" r="7464" b="11549">match</wd>

<space/>

<wd l="7512" t="11405" r="7661" b="11549">it.</wd>

</ln>

</para>

<para l="6149" t="11827" r="7363" b="12370" alignment="left" ri="3168" spaceBefore="93" lsp="exactly" lspExact="365" language="en">

<ln l="6154" t="11827" r="7363" b="12048" baseLine="11995" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="11832" r="6264" b="12000">5</wd>

<space/>

<wd l="6509" t="11827" r="7363" b="12048">Analysis</wd>

<space/>

</ln>

<ln l="6149" t="12226" r="7325" b="12370" baseLine="12360" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="12226" r="6389" b="12370">5.1</wd>

<space/>

<wd l="6595" t="12226" r="7325" b="12370">Features</wd>

</ln>

</para>

<para l="6149" t="12538" r="10517" b="15355" alignment="justified" spaceBefore="62" lsp="exactly" lspExact="239" language="en">

<ln l="6149" t="12538" r="10507" b="12725" baseLine="12672" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="12542" r="6317" b="12677">In</wd>

<space/>

<wd l="6432" t="12557" r="6869" b="12682">terms</wd>

<space/>

<wd l="6998" t="12538" r="7171" b="12682">of</wd>

<space/>

<wd l="7277" t="12538" r="7954" b="12706">features,</wd>

<space/>

<wd l="8093" t="12581" r="8323" b="12682">we</wd>

<space/>

<wd l="8443" t="12538" r="8986" b="12682">looked</wd>

<space/>

<wd l="9110" t="12557" r="9250" b="12682">at</wd>

<space/>

<wd l="9365" t="12538" r="9610" b="12682">the</wd>

<space/>

<wd l="9739" t="12557" r="10507" b="12725">strongest-</wd>

</ln>

<ln l="6149" t="12773" r="10512" b="12960" baseLine="12912" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="12773" r="6878" b="12960">weighted</wd>

<space/>

<wd l="6974" t="12773" r="7968" b="12917">observations</wd>

<space/>

<wd l="8064" t="12773" r="8222" b="12912">in</wd>

<space/>

<wd l="8314" t="12773" r="8554" b="12917">the</wd>

<space/>

<wd l="8650" t="12792" r="9264" b="12960">notypes</wd>

<space/>

<wd l="9360" t="12773" r="9902" b="12941">model,</wd>

<space/>

<wd l="10013" t="12792" r="10162" b="12917">to</wd>

<space/>

<wd l="10267" t="12816" r="10512" b="12917">see</wd>

<space/>

</ln>

<ln l="6149" t="13013" r="10517" b="13200" baseLine="13152" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13013" r="6538" b="13157">what</wd>

<space/>

<wd l="6624" t="13013" r="6864" b="13157">the</wd>

<space/>

<wd l="6955" t="13013" r="7536" b="13200">general</wd>

<space/>

<wd l="7627" t="13013" r="8405" b="13157">indicators</wd>

<space/>

<wd l="8501" t="13056" r="8736" b="13157">are</wd>

<space/>

<wd l="8832" t="13013" r="9005" b="13157">of</wd>

<space/>

<wd l="9077" t="13013" r="9610" b="13157">named</wd>

<space/>

<wd l="9701" t="13013" r="10267" b="13157">entities</wd>

<space/>

<wd l="10358" t="13013" r="10517" b="13152">in</wd>

<space/>

</ln>

<ln l="6149" t="13253" r="10502" b="13440" baseLine="13387" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13272" r="6696" b="13397">tweets.</wd>

<space/>

<wd l="6888" t="13253" r="7195" b="13397">The</wd>

<space/>

<wd l="7286" t="13253" r="7814" b="13440">largest</wd>

<space/>

<wd l="7906" t="13253" r="8078" b="13397">of</wd>

<space/>

<wd l="8160" t="13253" r="8563" b="13397">these</wd>

<space/>

<wd l="8659" t="13296" r="8899" b="13397">are</wd>

<space/>

<wd l="9000" t="13253" r="9504" b="13397">shown</wd>

<space/>

<wd l="9595" t="13253" r="9749" b="13392">in</wd>

<space/>

<wd l="9835" t="13253" r="10272" b="13397">Table</wd>

<space/>

<wd l="10368" t="13253" r="10502" b="13397">3.</wd>

<space/>

</ln>

<ln l="6154" t="13493" r="10517" b="13680" baseLine="13627" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6154" t="13493" r="6370" b="13637">Of</wd>

<space/>

<wd l="6413" t="13512" r="6758" b="13637">note</wd>

<space/>

<wd l="6816" t="13493" r="6941" b="13637">is</wd>

<space/>

<wd l="7003" t="13493" r="7306" b="13637">that</wd>

<space/>

<wd l="7358" t="13493" r="7982" b="13637">features</wd>

<space/>

<wd l="8045" t="13493" r="8842" b="13680">indicating</wd>

<space/>

<wd l="8899" t="13498" r="9418" b="13661">URLs,</wd>

<space/>

<wd l="9485" t="13493" r="10166" b="13680">hashtags</wd>

<space/>

<wd l="10229" t="13493" r="10517" b="13637">and</wd>

<space/>

</ln>

<ln l="6149" t="13733" r="10507" b="13920" baseLine="13867" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13776" r="6984" b="13877">usernames</wd>

<space/>

<wd l="7046" t="13733" r="7675" b="13877">indicate</wd>

<space/>

<wd l="7742" t="13733" r="8304" b="13920">against</wd>

<space/>

<wd l="8366" t="13776" r="8544" b="13877">an</wd>

<space/>

<wd l="8611" t="13733" r="9110" b="13920">entity;</wd>

<space/>

<wd l="9178" t="13733" r="9970" b="13877">lowercase</wd>

<space/>

<wd l="10027" t="13733" r="10507" b="13877">words</wd>

<space/>

</ln>

<ln l="6149" t="13973" r="10507" b="14160" baseLine="14107" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="13973" r="6898" b="14160">including</wd>

<space/>

<wd l="6946" t="13973" r="7930" b="14155">punctuation,</wd>

<space/>

<wd l="7982" t="14016" r="8146" b="14117">or</wd>

<space/>

<wd l="8194" t="13973" r="9082" b="14160">comprising</wd>

<space/>

<wd l="9130" t="13973" r="9475" b="14160">only</wd>

<space/>

<wd l="9523" t="13973" r="10507" b="14155">punctuation,</wd>

<space/>

</ln>

<ln l="6154" t="14208" r="10512" b="14395" baseLine="14347">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6154" t="14251" r="6389" b="14352">are</wd>

<space/>

<wd l="6451" t="14227" r="6706" b="14352">not</wd>

<space/>

<wd l="6768" t="14208" r="7387" b="14376">entities;</wd>

<space/>

<wd l="7454" t="14208" r="7896" b="14395">being</wd>

<space/>

<wd l="7958" t="14208" r="8779" b="14390">proceeded</wd>

<space/>

<wd l="8832" t="14208" r="9029" b="14395">by</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9091" t="14237" r="9254" b="14352">at</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="9302" t="14208" r="10008" b="14352">indicates</wd>

<space/>

<wd l="10070" t="14208" r="10512" b="14395">being</wd>

<space/>

</run>

</ln>

<ln l="6149" t="14448" r="10512" b="14635" baseLine="14587" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="14448" r="6302" b="14587">in</wd>

<space/>

<wd l="6379" t="14491" r="6562" b="14592">an</wd>

<space/>

<wd l="6643" t="14448" r="7090" b="14635">entity</wd>

<space/>

<wd l="7171" t="14448" r="7526" b="14626">(+ve</wd>

<space/>

<wd l="7598" t="14453" r="7718" b="14587">B</wd>

<space/>

<wd l="7805" t="14448" r="8352" b="14635">weight</wd>

<space/>

<wd l="8424" t="14448" r="8712" b="14592">and</wd>

<space/>

<wd l="8789" t="14491" r="9029" b="14592">-ve</wd>

<space/>

<wd l="9110" t="14448" r="9245" b="14592">O</wd>

<space/>

<wd l="9322" t="14448" r="9979" b="14635">weight);</wd>

<space/>

<wd l="10070" t="14448" r="10512" b="14635">being</wd>

<space/>

</ln>

<ln l="6149" t="14688" r="10517" b="14875" baseLine="14822">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6149" t="14688" r="6869" b="14870">preceded</wd>

<space/>

<wd l="6941" t="14688" r="7133" b="14875">by</wd>

<space/>

</run>

<wd l="7214" t="14688" r="7406" b="14870"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">of</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7493" t="14688" r="7795" b="14832">and</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7867" t="14731" r="8030" b="14832">or</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8102" t="14688" r="8438" b="14832">with</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8525" t="14707" r="9187" b="14875">suggests</wd>

<space/>

<wd l="9269" t="14731" r="9446" b="14832">an</wd>

<space/>

<wd l="9528" t="14688" r="10027" b="14875">entity;</wd>

<space/>

<wd l="10123" t="14688" r="10517" b="14832">short</wd>

<space/>

</run>

</ln>

<ln l="6149" t="14928" r="10507" b="15115" baseLine="15062" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="14928" r="6629" b="15072">words</wd>

<space/>

<wd l="6706" t="14928" r="6994" b="15072">and</wd>

<space/>

<wd l="7061" t="14928" r="8290" b="15115">hashtag-shaped</wd>

<space/>

<wd l="8357" t="14928" r="8837" b="15072">words</wd>

<space/>

<wd l="8918" t="14971" r="9154" b="15072">are</wd>

<space/>

<wd l="9226" t="14947" r="9480" b="15072">not</wd>

<space/>

<wd l="9552" t="14928" r="10171" b="15096">entities;</wd>

<space/>

<wd l="10258" t="14928" r="10507" b="15072">be-</wd>

</ln>

<ln l="6149" t="15168" r="10507" b="15355" baseLine="15302">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6149" t="15168" r="6398" b="15355">ing</wd>

<space/>

<wd l="6446" t="15168" r="7152" b="15312">followed</wd>

<space/>

<wd l="7195" t="15168" r="7392" b="15355">by</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7445" t="15168" r="8011" b="15350">tonight</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="8054" t="15187" r="8717" b="15355">suggests</wd>

<space/>

<wd l="8765" t="15168" r="9206" b="15355">being</wd>

<space/>

<wd l="9254" t="15168" r="9725" b="15312">inside</wd>

<space/>

<wd l="9778" t="15211" r="9955" b="15312">an</wd>

<space/>

<wd l="10008" t="15168" r="10507" b="15355">entity;</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="1438" t="15736" r="10529" b="15977">

<para l="5804" t="15787" r="6148" b="15946" alignment="centered" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15787" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="41">

<wd l="5870" t="15787" r="6082" b="15946">50</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4306.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1440" marginTop="1273" marginRight="1354" marginBottom="1292" offsetX="-42" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1440" t="1273" r="10555" b="15363">

<column l="1440" t="1273" r="6038" b="15363">

<rulerline l="1440" t="1306" r="6038" b="1306" type="single" width="10" color="000000"/>

<para l="1642" t="1325" r="4685" b="1488" alignment="left" li="144" spaceBefore="12" spaceAfter="1" lsp="exactly" lspExact="211" language="en">

<tabs position="1642"/>

<ln l="1642" t="1325" r="4685" b="1488" baseLine="1445" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1642" t="1325" r="2299" b="1454">Features</wd>

<tab position="2299"/>

<wd l="3653" t="1325" r="4090" b="1454">Label</wd>

<space/>

<wd l="4138" t="1325" r="4685" b="1488">Weight</wd>

</ln>

</para>

<rulerline l="1440" t="1512" r="6038" b="1512" type="single" width="10" color="000000"/>

<para l="1642" t="1531" r="5808" b="1699" alignment="justified" li="144" spaceBefore="4" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="1699" leaderChar=" "/>

<ln l="1642" t="1531" r="5808" b="1699" baseLine="1656" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="1531" r="2179" b="1699">pref=@</wd>

<tab position="2179"/>

<wd l="4109" t="1531" r="4229" b="1661">O</wd>

<tab position="4229"/>

<wd l="5146" t="1531" r="5808" b="1661">3.368445</wd>

</ln>

</para>

<para l="1642" t="1733" r="5813" b="1901" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="1901" leaderChar=" "/>

<ln l="1642" t="1733" r="5813" b="1901" baseLine="1853" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="1733" r="2222" b="1901">pref=htt</wd>

<tab position="2222"/>

<wd l="4109" t="1733" r="4229" b="1862">O</wd>

<tab position="4229"/>

<wd l="5146" t="1733" r="5813" b="1862">2.049354</wd>

</ln>

</para>

<para l="1642" t="1930" r="5813" b="2098" alignment="justified" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="2098" leaderChar=" "/>

<ln l="1642" t="1930" r="5813" b="2098" baseLine="2054" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="1930" r="2122" b="2098">pref=#</wd>

<tab position="2122"/>

<wd l="4109" t="1930" r="4229" b="2059">O</wd>

<tab position="4229"/>

<wd l="5160" t="1930" r="5813" b="2059">1.979034</wd>

</ln>

</para>

<para l="1646" t="2131" r="5808" b="2299" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1646"/>

<tabs alignment="decimal" position="2299" leaderChar=" "/>

<ln l="1646" t="2131" r="5808" b="2299" baseLine="2251" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="2131" r="2688" b="2299">shapeshort-x.x</wd>

<tab position="2688"/>

<wd l="4109" t="2131" r="4229" b="2261">O</wd>

<tab position="4229"/>

<wd l="5160" t="2131" r="5808" b="2261">1.688033</wd>

</ln>

</para>

<para l="1646" t="2328" r="5813" b="2496" alignment="justified" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="1646"/>

<tabs alignment="decimal" position="2496" leaderChar=" "/>

<ln l="1646" t="2328" r="5813" b="2496" baseLine="2453" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="2328" r="2501" b="2496">shapeshort-.</wd>

<tab position="2501"/>

<wd l="4109" t="2328" r="4229" b="2458">O</wd>

<tab position="4229"/>

<wd l="5160" t="2328" r="5813" b="2458">1.552530</wd>

</ln>

</para>

<para l="1642" t="2530" r="5813" b="2683" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="2683" leaderChar=" "/>

<ln l="1642" t="2530" r="5813" b="2683" baseLine="2650" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="2530" r="2275" b="2683">w[-1]=at</wd>

<tab position="2275"/>

<wd l="4109" t="2534" r="4219" b="2654">B</wd>

<tab position="4219"/>

<wd l="5160" t="2530" r="5813" b="2659">1.519609</wd>

</ln>

</para>

<para l="1642" t="2726" r="5798" b="2894" alignment="justified" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="2894" leaderChar=" "/>

<ln l="1642" t="2726" r="5798" b="2894" baseLine="2851" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="2726" r="3240" b="2894">p14x11110011111011</wd>

<tab position="3240"/>

<wd l="4109" t="2726" r="4229" b="2856">O</wd>

<tab position="4229"/>

<wd l="5160" t="2726" r="5798" b="2856">1.326481</wd>

</ln>

</para>

<para l="1642" t="2928" r="5813" b="3082" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="3082" leaderChar=" "/>

<ln l="1642" t="2928" r="5813" b="3082" baseLine="3048" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="2928" r="2275" b="3082">w[-1]=at</wd>

<tab position="2275"/>

<wd l="4109" t="2928" r="4229" b="3058">O</wd>

<tab position="4229"/>

<wd l="5088" t="2928" r="5813" b="3058">-1.285570</wd>

</ln>

</para>

<para l="1642" t="3125" r="5813" b="3278" alignment="justified" li="144" spaceBefore="3" lsp="exactly" lspExact="197" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="3278" leaderChar=" "/>

<ln l="1642" t="3125" r="5813" b="3278" baseLine="3250" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="3125" r="2304" b="3278">w[-1]=of</wd>

<tab position="2304"/>

<wd l="4109" t="3125" r="4229" b="3254">O</wd>

<tab position="4229"/>

<wd l="5088" t="3125" r="5813" b="3254">-1.244912</wd>

</ln>

</para>

<para l="1642" t="3326" r="5808" b="3494" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="3494" leaderChar=" "/>

<ln l="1642" t="3326" r="5808" b="3494" baseLine="3446" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="3326" r="2237" b="3494">length-2</wd>

<tab position="2237"/>

<wd l="4109" t="3326" r="4229" b="3456">O</wd>

<tab position="4229"/>

<wd l="5160" t="3326" r="5808" b="3456">1.196777</wd>

</ln>

</para>

<para l="1642" t="3523" r="5808" b="3691" alignment="justified" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="3691" leaderChar=" "/>

<ln l="1642" t="3523" r="5808" b="3691" baseLine="3648" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="3523" r="2232" b="3691">length-3</wd>

<tab position="2232"/>

<wd l="4109" t="3523" r="4229" b="3653">O</wd>

<tab position="4229"/>

<wd l="5160" t="3523" r="5808" b="3653">1.177138</wd>

</ln>

</para>

<para l="1646" t="3725" r="5808" b="3893" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1646"/>

<tabs alignment="decimal" position="3893" leaderChar=" "/>

<ln l="1646" t="3725" r="5808" b="3893" baseLine="3845" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="3725" r="2592" b="3893">shapeshort-x.</wd>

<tab position="2592"/>

<wd l="4109" t="3725" r="4229" b="3854">O</wd>

<tab position="4229"/>

<wd l="5160" t="3725" r="5808" b="3854">1.172663</wd>

</ln>

</para>

<para l="1642" t="3922" r="2832" b="4090" alignment="justified" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<ln l="1642" t="3922" r="2832" b="4090" baseLine="4046" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1642" t="3922" r="1781" b="4046">in</wd>

<space/>

<wd l="1853" t="3922" r="2832" b="4090">gaz=Freebase</wd>

</ln>

</para>

<para l="1642" t="4123" r="5808" b="4291" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="4291" leaderChar=" "/>

<ln l="1642" t="4123" r="5808" b="4291" baseLine="4243" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="4123" r="3043" b="4291">videogameplatform</wd>

<tab position="3043"/>

<wd l="4109" t="4123" r="4229" b="4253">O</wd>

<tab position="4229"/>

<wd l="5088" t="4123" r="5808" b="4253">-1.152093</wd>

</ln>

</para>

<para l="1642" t="4320" r="5808" b="4474" alignment="justified" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="4474" leaderChar=" "/>

<ln l="1642" t="4320" r="5808" b="4474" baseLine="4445" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="4320" r="2400" b="4474">w[-1]=and</wd>

<tab position="2400"/>

<wd l="4109" t="4320" r="4229" b="4450">O</wd>

<tab position="4229"/>

<wd l="5088" t="4320" r="5808" b="4450">-1.143885</wd>

</ln>

</para>

<para l="1642" t="4522" r="5808" b="4690" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="4690" leaderChar=" "/>

<ln l="1642" t="4522" r="5808" b="4690" baseLine="4642" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="4522" r="2222" b="4690">length-1</wd>

<tab position="2222"/>

<wd l="4109" t="4526" r="4219" b="4646">B</wd>

<tab position="4219"/>

<wd l="5088" t="4522" r="5808" b="4651">-1.132128</wd>

</ln>

</para>

<para l="1646" t="4718" r="5798" b="4886" alignment="justified" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="1646"/>

<tabs alignment="decimal" position="4886" leaderChar=" "/>

<ln l="1646" t="4718" r="5798" b="4886" baseLine="4843" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="4718" r="2683" b="4886">shapeshort-Xx</wd>

<tab position="2683"/>

<wd l="4109" t="4718" r="4229" b="4848">O</wd>

<tab position="4229"/>

<wd l="5088" t="4718" r="5798" b="4848">-1.128341</wd>

</ln>

</para>

<para l="1642" t="4920" r="5813" b="5074" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="5074" leaderChar=" "/>

<ln l="1642" t="4920" r="5813" b="5074" baseLine="5040" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="4920" r="2462" b="5074">w[-1]=with</wd>

<tab position="2462"/>

<wd l="4109" t="4920" r="4229" b="5050">O</wd>

<tab position="4229"/>

<wd l="5088" t="4920" r="5813" b="5050">-1.093224</wd>

</ln>

</para>

<para l="1642" t="5117" r="5813" b="5285" alignment="justified" li="144" spaceBefore="3" lsp="exactly" lspExact="197" language="en">

<tabs position="1642"/>

<tabs alignment="decimal" position="5285" leaderChar=" "/>

<ln l="1642" t="5117" r="5813" b="5285" baseLine="5242" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="5117" r="2592" b="5285">w[1]=tonight</wd>

<tab position="2592"/>

<wd l="4109" t="5117" r="4229" b="5246">O</wd>

<tab position="4229"/>

<wd l="5088" t="5117" r="5813" b="5246">-1.077982</wd>

</ln>

</para>

<para l="1646" t="5318" r="5813" b="5486" alignment="justified" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1646"/>

<tabs alignment="decimal" position="5486" leaderChar=" "/>

<ln l="1646" t="5318" r="5813" b="5486" baseLine="5438" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="5318" r="2554" b="5486">shapeshort-0</wd>

<tab position="2554"/>

<wd l="4109" t="5323" r="4219" b="5443">B</wd>

<tab position="4219"/>

<wd l="5088" t="5318" r="5813" b="5448">-1.051406</wd>

</ln>

</para>

<para l="1536" t="5726" r="5712" b="5914" alignment="justified" li="144" spaceBefore="183" spaceAfter="306" lsp="exactly" lspExact="240" language="en">

<ln l="1536" t="5726" r="5712" b="5914" baseLine="5861" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1536" t="5726" r="1973" b="5870">Table</wd>

<space/>

<wd l="2026" t="5726" r="2160" b="5870">3:</wd>

<space/>

<wd l="2237" t="5731" r="2837" b="5914">Largest</wd>

<space/>

<wd l="2885" t="5726" r="3614" b="5914">weighted</wd>

<space/>

<wd l="3662" t="5726" r="4286" b="5870">features</wd>

<space/>

<wd l="4344" t="5726" r="4498" b="5866">in</wd>

<space/>

<wd l="4550" t="5746" r="5165" b="5914">notypes</wd>

<space/>

<wd l="5218" t="5726" r="5712" b="5870">model</wd>

</ln>

</para>

<rulerline l="1440" t="6235" r="6038" b="6235" type="single" width="10" color="000000"/>

<para l="1642" t="6254" r="5832" b="6418" alignment="left" li="144" lsp="exactly" lspExact="196" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="6418" leaderChar=" "/>

<ln l="1642" t="6254" r="5832" b="6418" baseLine="6374" bold="true" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="6254" r="2299" b="6384">Features</wd>

<tab position="2299"/>

<wd l="3677" t="6254" r="4114" b="6384">Label</wd>

<space/>

<wd l="4157" t="6254" r="4709" b="6418">Weight</wd>

<tab position="4709"/>

<wd l="5357" t="6254" r="5832" b="6384">Terms</wd>

</ln>

</para>

<para l="1642" t="6461" r="5827" b="6629" alignment="left" li="144" spaceBefore="10" lsp="exactly" lspExact="199" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="6629" leaderChar=" "/>

<ln l="1642" t="6461" r="5827" b="6629" baseLine="6581" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="6499" r="1954" b="6629">prev</wd>

<space/>

<wd l="2021" t="6461" r="2539" b="6629">p3x011</wd>

<tab position="2539"/>

<wd l="3830" t="6461" r="4546" b="6629">B-geo-loc</wd>

<tab position="4546"/>

<wd l="5107" t="6461" r="5827" b="6590">-0.571505</wd>

</ln>

</para>

<para l="1642" t="6658" r="5832" b="6826" alignment="left" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="6826" leaderChar=" "/>

<ln l="1642" t="6658" r="5832" b="6826" baseLine="6782" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="6658" r="3240" b="6826">p14x11110011111001</wd>

<tab position="3240"/>

<wd l="3917" t="6658" r="4464" b="6787">B-other</wd>

<tab position="4464"/>

<wd l="5107" t="6658" r="5832" b="6787">-0.585369</wd>

</ln>

</para>

<para l="1642" t="6859" r="5832" b="7027" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="7027" leaderChar=" "/>

<ln l="1642" t="6859" r="5832" b="7027" baseLine="6979" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="6898" r="1954" b="7027">prev</wd>

<space/>

<wd l="2021" t="6859" r="2822" b="7027">p6x111100</wd>

<tab position="2822"/>

<wd l="3773" t="6864" r="4603" b="7027">B-company</wd>

<tab position="4603"/>

<wd l="5107" t="6859" r="5832" b="6989">-0.604976</wd>

</ln>

</para>

<para l="1642" t="7056" r="5832" b="7224" alignment="left" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="7224" leaderChar=" "/>

<ln l="1642" t="7056" r="5832" b="7224" baseLine="7181" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="7056" r="3072" b="7224">p12x111100111110</wd>

<tab position="3072"/>

<wd l="3830" t="7056" r="4546" b="7224">B-geo-loc</wd>

<tab position="4546"/>

<wd l="5107" t="7056" r="5832" b="7186">-0.620909</wd>

</ln>

</para>

<para l="1642" t="7258" r="5832" b="7426" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="7426" leaderChar=" "/>

<ln l="1642" t="7258" r="5832" b="7426" baseLine="7378" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="7296" r="1954" b="7426">prev</wd>

<space/>

<wd l="2021" t="7258" r="2645" b="7426">p4x0100</wd>

<tab position="2645"/>

<wd l="3859" t="7262" r="4522" b="7426">B-person</wd>

<tab position="4522"/>

<wd l="5107" t="7258" r="5832" b="7387">-0.655420</wd>

</ln>

</para>

<para l="1642" t="7454" r="5818" b="7622" alignment="left" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="7622" leaderChar=" "/>

<ln l="1642" t="7454" r="5818" b="7622" baseLine="7579" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="7493" r="1954" b="7622">prev</wd>

<space/>

<wd l="2021" t="7454" r="3274" b="7622">p18x0000111110</wd>

<tab position="3274"/>

<wd l="3845" t="7454" r="4531" b="7622">B-facility</wd>

<tab position="4531"/>

<wd l="5160" t="7454" r="5818" b="7584">0.699101</wd>

</ln>

</para>

<para l="1642" t="7656" r="5818" b="7824" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="7824" leaderChar=" "/>

<ln l="1642" t="7656" r="5818" b="7824" baseLine="7776" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="7694" r="1954" b="7824">prev</wd>

<space/>

<wd l="2021" t="7656" r="3274" b="7824">p20x0000111110</wd>

<tab position="3274"/>

<wd l="3845" t="7656" r="4531" b="7824">B-facility</wd>

<tab position="4531"/>

<wd l="5160" t="7656" r="5818" b="7786">0.699101</wd>

</ln>

</para>

<para l="1642" t="7853" r="5827" b="8021" alignment="left" li="144" spaceBefore="3" lsp="exactly" lspExact="197" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="8021" leaderChar=" "/>

<ln l="1642" t="7853" r="5827" b="8021" baseLine="7978" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="7891" r="1954" b="8021">prev</wd>

<space/>

<wd l="2021" t="7853" r="2554" b="8021">p3x010</wd>

<tab position="2554"/>

<wd l="3710" t="7858" r="4666" b="8021">B-sportsteam</wd>

<tab position="4666"/>

<wd l="5160" t="7853" r="5827" b="7982">0.709865</wd>

</ln>

</para>

<para l="1642" t="8054" r="5827" b="8222" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="8222" leaderChar=" "/>

<ln l="1642" t="8054" r="5827" b="8222" baseLine="8174" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="8054" r="2894" b="8222">p10x0110011010</wd>

<tab position="2894"/>

<wd l="3845" t="8054" r="4531" b="8184">B-tvshow</wd>

<tab position="4531"/>

<wd l="5160" t="8054" r="5827" b="8184">0.714127</wd>

</ln>

</para>

<para l="1642" t="8251" r="5827" b="8419" alignment="left" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="8419" leaderChar=" "/>

<ln l="1642" t="8251" r="5827" b="8419" baseLine="8376" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="8251" r="2165" b="8419">p3x011</wd>

<tab position="2165"/>

<wd l="3859" t="8256" r="4522" b="8419">B-person</wd>

<tab position="4522"/>

<wd l="5107" t="8251" r="5827" b="8381">-0.717037</wd>

</ln>

</para>

<para l="1642" t="8453" r="5832" b="8621" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="8621" leaderChar=" "/>

<ln l="1642" t="8453" r="5832" b="8621" baseLine="8573" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="8453" r="3240" b="8621">p14x11110011111001</wd>

<tab position="3240"/>

<wd l="3826" t="8453" r="4555" b="8621">B-product</wd>

<tab position="4555"/>

<wd l="5160" t="8453" r="5832" b="8582">0.747492</wd>

</ln>

</para>

<para l="1642" t="8650" r="5827" b="8818" alignment="left" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="8818" leaderChar=" "/>

<ln l="1642" t="8650" r="5827" b="8818" baseLine="8774" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="8688" r="1954" b="8818">prev</wd>

<space/>

<wd l="2021" t="8650" r="3005" b="8818">p8x11110110</wd>

<tab position="3005"/>

<wd l="3917" t="8650" r="4464" b="8779">B-other</wd>

<tab position="4464"/>

<wd l="5160" t="8650" r="5827" b="8779">0.774895</wd>

</ln>

</para>

<para l="1642" t="8851" r="5827" b="9019" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="9019" leaderChar=" "/>

<ln l="1642" t="8851" r="5827" b="9019" baseLine="8971" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="8851" r="3254" b="9019">p14x11110011111100</wd>

<tab position="3254"/>

<wd l="3830" t="8851" r="4546" b="9019">B-geo-loc</wd>

<tab position="4546"/>

<wd l="5160" t="8851" r="5827" b="8981">0.804635</wd>

</ln>

</para>

<para l="1642" t="9048" r="5827" b="9216" alignment="left" li="144" spaceBefore="3" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="9216" leaderChar=" "/>

<ln l="1642" t="9048" r="5827" b="9216" baseLine="9173" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="9086" r="1954" b="9216">prev</wd>

<space/>

<wd l="2021" t="9048" r="2554" b="9216">p3x010</wd>

<tab position="2554"/>

<wd l="3859" t="9053" r="4522" b="9216">B-person</wd>

<tab position="4522"/>

<wd l="5107" t="9048" r="5827" b="9178">-0.894333</wd>

</ln>

</para>

<para l="1642" t="9250" r="5827" b="9418" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="9418" leaderChar=" "/>

<ln l="1642" t="9250" r="5827" b="9418" baseLine="9370" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="9250" r="3058" b="9418">p12x111100111111</wd>

<tab position="3058"/>

<wd l="3830" t="9250" r="4546" b="9418">B-geo-loc</wd>

<tab position="4546"/>

<wd l="5160" t="9250" r="5827" b="9379">0.895203</wd>

</ln>

</para>

<para l="1642" t="9446" r="5832" b="9614" alignment="left" li="144" spaceBefore="2" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="9614" leaderChar=" "/>

<ln l="1642" t="9446" r="5832" b="9614" baseLine="9571" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="9446" r="3254" b="9614">p14x11110011110110</wd>

<tab position="3254"/>

<wd l="3859" t="9451" r="4522" b="9614">B-person</wd>

<tab position="4522"/>

<wd l="5160" t="9446" r="5832" b="9576">0.950866</wd>

</ln>

</para>

<para l="1642" t="9648" r="5832" b="9816" alignment="left" li="144" lsp="exactly" lspExact="198" language="en">

<tabs position="1642"/>

<tabs alignment="right" position="9816" leaderChar=" "/>

<ln l="1642" t="9648" r="5832" b="9816" baseLine="9768" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="9648" r="3254" b="9816">p14x11110011111000</wd>

<tab position="3254"/>

<wd l="3773" t="9653" r="4603" b="9816">B-company</wd>

<tab position="4603"/>

<wd l="5179" t="9648" r="5832" b="9778">1.044984</wd>

</ln>

</para>

<para l="1445" t="10056" r="5808" b="10483" alignment="justified" ri="216" spaceBefore="182" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="10056" r="5808" b="10243" baseLine="10190" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="10056" r="1877" b="10200">Table</wd>

<space/>

<wd l="1963" t="10056" r="2098" b="10200">4:</wd>

<space/>

<wd l="2242" t="10056" r="3634" b="10243">Largest-weighted</wd>

<space/>

<wd l="3710" t="10061" r="4248" b="10200">Brown</wd>

<space/>

<wd l="4334" t="10056" r="4862" b="10200">cluster</wd>

<space/>

<wd l="4944" t="10056" r="5568" b="10200">features</wd>

<space/>

<wd l="5654" t="10056" r="5808" b="10195">in</wd>

<space/>

</ln>

<ln l="1464" t="10296" r="2501" b="10483" baseLine="10430" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1464" t="10296" r="2122" b="10483">10-types</wd>

<space/>

<wd l="2179" t="10296" r="2501" b="10440">task</wd>

</ln>

</para>

<para l="1445" t="11021" r="5803" b="11683" alignment="justified" ri="216" spaceBefore="484" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="11021" r="5803" b="11208" baseLine="11155" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="11021" r="2122" b="11165">numbers</wd>

<space/>

<wd l="2184" t="11021" r="2645" b="11208">rarely</wd>

<space/>

<wd l="2717" t="11040" r="3048" b="11165">start</wd>

<space/>

<wd l="3110" t="11021" r="3730" b="11189">entities;</wd>

<space/>

<wd l="3797" t="11021" r="4080" b="11165">and</wd>

<space/>

<wd l="4138" t="11021" r="4574" b="11208">being</wd>

<space/>

<wd l="4637" t="11021" r="5309" b="11165">matched</wd>

<space/>

<wd l="5366" t="11021" r="5563" b="11208">by</wd>

<space/>

<wd l="5626" t="11064" r="5803" b="11165">an</wd>

<space/>

</ln>

<ln l="1450" t="11256" r="5803" b="11443" baseLine="11395" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="11275" r="1848" b="11443">entry</wd>

<space/>

<wd l="1925" t="11256" r="2083" b="11395">in</wd>

<space/>

<wd l="2155" t="11256" r="2395" b="11400">the</wd>

<space/>

<wd l="2467" t="11256" r="2909" b="11400">video</wd>

<space/>

<wd l="2990" t="11299" r="3490" b="11443">games</wd>

<space/>

<wd l="3571" t="11275" r="4286" b="11443">gazetteer</wd>

<space/>

<wd l="4368" t="11275" r="5026" b="11443">suggests</wd>

<space/>

<wd l="5102" t="11256" r="5544" b="11443">being</wd>

<space/>

<wd l="5626" t="11299" r="5803" b="11400">an</wd>

<space/>

</ln>

<ln l="1450" t="11496" r="1920" b="11683" baseLine="11635" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="11496" r="1920" b="11683">entity.</wd>

</ln>

</para>

<para l="1450" t="11779" r="5803" b="12202" alignment="justified" ri="216" spaceBefore="39" fli="144" lsp="exactly" lspExact="240" language="en">

<ln l="1646" t="11779" r="5803" b="11966" baseLine="11914" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="11779" r="1973" b="11923">One</wd>

<space/>

<wd l="2026" t="11779" r="2554" b="11923">cluster</wd>

<space/>

<wd l="2602" t="11779" r="3062" b="11962">prefix</wd>

<space/>

<wd l="3110" t="11822" r="3413" b="11923">was</wd>

<space/>

<wd l="3466" t="11779" r="4243" b="11923">indicative</wd>

<space/>

<wd l="4296" t="11779" r="4469" b="11923">of</wd>

<space/>

<wd l="4507" t="11779" r="4944" b="11966">being</wd>

<space/>

<wd l="5002" t="11779" r="5573" b="11923">outside</wd>

<space/>

<wd l="5626" t="11822" r="5803" b="11923">an</wd>

<space/>

</ln>

<ln l="1450" t="12014" r="5803" b="12202" baseLine="12154" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="12014" r="1920" b="12202">entity.</wd>

<space/>

<wd l="2059" t="12014" r="2410" b="12158">This</wd>

<space/>

<wd l="2491" t="12014" r="3019" b="12158">cluster</wd>

<space/>

<wd l="3086" t="12014" r="3547" b="12197">prefix</wd>

<space/>

<wd l="3629" t="12014" r="4402" b="12158">contained</wd>

<space/>

<wd l="4469" t="12014" r="4805" b="12158">four</wd>

<space/>

<wd l="4882" t="12014" r="5803" b="12182">subclusters,</wd>

</ln>

</para>

<para l="1450" t="12254" r="5808" b="12682" alignment="justified" ri="216" spaceBefore="8" lsp="exactly" lspExact="235" language="en">

<ln l="1450" t="12254" r="5803" b="12442" baseLine="12394" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1450" t="12254" r="1810" b="12398">each</wd>

<space/>

<wd l="1901" t="12254" r="2736" b="12398">dominated</wd>

<space/>

<wd l="2822" t="12254" r="3019" b="12442">by</wd>

<space/>

<wd l="3110" t="12254" r="3322" b="12398">lot</wd>

<space/>

<wd l="3413" t="12254" r="3586" b="12398">of</wd>

<space/>

<wd l="3662" t="12254" r="5309" b="12442">frequently-occurring</wd>

<space/>

<wd l="5405" t="12254" r="5803" b="12398">dates</wd>

<space/>

</ln>

<ln l="1454" t="12494" r="5808" b="12682" baseLine="12629">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1454" t="12494" r="1786" b="12682">(e.g.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1963" t="12494" r="2813" b="12677">September</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2885" t="12494" r="3240" b="12638">with</wd>

<space/>

<wd l="3346" t="12494" r="3816" b="12638">12368</wd>

<space/>

<wd l="3907" t="12494" r="4632" b="12638">mentions</wd>

<space/>

<wd l="4723" t="12494" r="4877" b="12634">in</wd>

<space/>

<wd l="4958" t="12494" r="5203" b="12638">the</wd>

<space/>

<wd l="5299" t="12538" r="5808" b="12638">source</wd>

</run>

</ln>

</para>

<para l="1445" t="12734" r="5808" b="13637" alignment="justified" ri="216" lsp="exactly" lspExact="240" language="en">

<ln l="1450" t="12734" r="5808" b="12922" baseLine="12869">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1450" t="12734" r="1834" b="12912">data)</wd>

<space/>

<wd l="1915" t="12734" r="2198" b="12878">and</wd>

<space/>

<wd l="2261" t="12734" r="3293" b="12917">less-frequent</wd>

<space/>

<wd l="3365" t="12734" r="3686" b="12878">date</wd>

<space/>

<wd l="3768" t="12734" r="4459" b="12922">spellings</wd>

<space/>

<wd l="4531" t="12734" r="4829" b="12878">like</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="4906" t="12734" r="5808" b="12917">Wedneaday</wd>

<space/>

</run>

</ln>

<ln l="1450" t="12974" r="5798" b="13142" baseLine="13109">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1450" t="13018" r="1608" b="13118">or</wd>

<space/>

<wd l="1699" t="13018" r="2078" b="13118">rarer</wd>

<space/>

<wd l="2174" t="12974" r="2938" b="13118">occasions</wd>

<space/>

</run>

<wd l="3034" t="12979" r="3835" b="13142"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Pentecost</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="3946" t="12974" r="4200" b="13118">but</wd>

<space/>

<wd l="4296" t="12974" r="4608" b="13118">also</wd>

<space/>

<wd l="4709" t="13018" r="4795" b="13118">a</wd>

<space/>

<wd l="4882" t="12974" r="5093" b="13118">lot</wd>

<space/>

<wd l="5189" t="12974" r="5362" b="13118">of</wd>

<space/>

<wd l="5443" t="12974" r="5798" b="13118">less-</wd>

</run>

</ln>

<ln l="1445" t="13210" r="5803" b="13397" baseLine="13349" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="13210" r="2107" b="13392">frequent</wd>

<space/>

<wd l="2160" t="13210" r="2582" b="13354">noise</wd>

<space/>

<wd l="2640" t="13210" r="3211" b="13378">entries,</wd>

<space/>

<wd l="3283" t="13253" r="3691" b="13354">some</wd>

<space/>

<wd l="3754" t="13210" r="3926" b="13354">of</wd>

<space/>

<wd l="3970" t="13210" r="4459" b="13354">which</wd>

<space/>

<wd l="4512" t="13253" r="4901" b="13354">were</wd>

<space/>

<wd l="4954" t="13210" r="5803" b="13397">potentially</wd>

<space/>

</ln>

<ln l="1445" t="13450" r="5808" b="13637" baseLine="13589">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1445" t="13450" r="1978" b="13594">named</wd>

<space/>

<wd l="2030" t="13450" r="2597" b="13594">entities</wd>

<space/>

<wd l="2659" t="13450" r="2995" b="13637">(e.g.</wd>

<space/>

</run>

<wd l="3072" t="13450" r="3614" b="13618"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">#ITV3</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<wd l="3667" t="13454" r="4344" b="13632"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Buggati</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<wd l="4402" t="13450" r="4949" b="13632"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Katja</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">).</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="5026" t="13450" r="5333" b="13594">The</wd>

<space/>

<wd l="5386" t="13450" r="5808" b="13594">noise</wd>

</run>

</ln>

</para>

<para l="1445" t="13690" r="5803" b="14117" alignment="justified" ri="216" spaceBefore="3" lsp="exactly" lspExact="235" language="en">

<ln l="1445" t="13690" r="5803" b="13877" baseLine="13829" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="13709" r="2021" b="13872">present</wd>

<space/>

<wd l="2098" t="13709" r="2760" b="13877">suggests</wd>

<space/>

<wd l="2837" t="13690" r="3178" b="13858">that,</wd>

<space/>

<wd l="3259" t="13690" r="3701" b="13834">while</wd>

<space/>

<wd l="3773" t="13690" r="4013" b="13834">the</wd>

<space/>

<wd l="4090" t="13690" r="4867" b="13877">clustering</wd>

<space/>

<wd l="4944" t="13690" r="5069" b="13834">is</wd>

<space/>

<wd l="5146" t="13690" r="5803" b="13877">working</wd>

<space/>

</ln>

<ln l="1445" t="13930" r="5803" b="14117" baseLine="14064" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="13930" r="1829" b="14098">well,</wd>

<space/>

<wd l="1910" t="13930" r="2304" b="14074">there</wd>

<space/>

<wd l="2381" t="13973" r="2616" b="14074">are</wd>

<space/>

<wd l="2688" t="13949" r="2942" b="14074">not</wd>

<space/>

<wd l="3014" t="13930" r="3595" b="14117">enough</wd>

<space/>

<wd l="3667" t="13930" r="4320" b="14098">clusters;</wd>

<space/>

<wd l="4406" t="13930" r="4637" b="14074">for</wd>

<space/>

<wd l="4709" t="13930" r="5179" b="14074">250M</wd>

<space/>

<wd l="5251" t="13949" r="5803" b="14098">tweets,</wd>

</ln>

</para>

<para l="1445" t="14170" r="5611" b="14357" alignment="left" spaceBefore="2" lsp="exactly" lspExact="240" language="en">

<ln l="1445" t="14170" r="5611" b="14357" baseLine="14304">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1445" t="14213" r="1675" b="14314">we</wd>

<space/>

<wd l="1733" t="14170" r="2256" b="14314">should</wd>

<space/>

<wd l="2304" t="14213" r="2568" b="14314">use</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1100" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="2626" t="14218" r="2794" b="14314">m</wd>

<space/>

<wd l="2866" t="14198" r="2990" b="14318">&gt;</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="0"><wd l="3072" t="14174" r="3456" b="14314">2000</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3518" t="14170" r="4483" b="14357">(Derczynski</wd>

<space/>

<wd l="4541" t="14189" r="4680" b="14314">et</wd>

<space/>

<wd l="4733" t="14170" r="4963" b="14338">al.,</wd>

<space/>

<wd l="5026" t="14170" r="5611" b="14347">2015a).</wd>

</run>

</ln>

</para>

<para l="1450" t="14448" r="5808" b="14875" alignment="justified" ri="216" spaceBefore="46" fli="144" lsp="exactly" lspExact="235" language="en">

<ln l="1642" t="14448" r="5808" b="14592" baseLine="14587" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1642" t="14453" r="1901" b="14592">We</wd>

<space/>

<wd l="1949" t="14448" r="2266" b="14592">also</wd>

<space/>

<wd l="2309" t="14448" r="2851" b="14592">looked</wd>

<space/>

<wd l="2899" t="14467" r="3038" b="14592">at</wd>

<space/>

<wd l="3082" t="14448" r="3322" b="14592">the</wd>

<space/>

<wd l="3370" t="14453" r="3907" b="14592">Brown</wd>

<space/>

<wd l="3950" t="14448" r="4550" b="14592">clusters</wd>

<space/>

<wd l="4598" t="14467" r="4987" b="14592">most</wd>

<space/>

<wd l="5030" t="14448" r="5808" b="14592">indicative</wd>

<space/>

</ln>

<ln l="1450" t="14688" r="5803" b="14875" baseLine="14822" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="14688" r="1618" b="14832">of</wd>

<space/>

<wd l="1670" t="14688" r="2117" b="14875">entity</wd>

<space/>

<wd l="2189" t="14707" r="2592" b="14832">starts</wd>

<space/>

<wd l="2654" t="14688" r="2813" b="14827">in</wd>

<space/>

<wd l="2866" t="14688" r="3110" b="14832">the</wd>

<space/>

<wd l="3168" t="14688" r="3614" b="14875">typed</wd>

<space/>

<wd l="3667" t="14688" r="4032" b="14856">task,</wd>

<space/>

<wd l="4099" t="14707" r="4248" b="14832">to</wd>

<space/>

<wd l="4315" t="14707" r="4555" b="14875">get</wd>

<space/>

<wd l="4618" t="14731" r="4795" b="14832">an</wd>

<space/>

<wd l="4858" t="14688" r="5194" b="14832">idea</wd>

<space/>

<wd l="5251" t="14688" r="5424" b="14832">of</wd>

<space/>

<wd l="5472" t="14688" r="5803" b="14832">how</wd>

</ln>

</para>

<para l="1445" t="14928" r="5803" b="15355" alignment="justified" ri="216" lsp="exactly" lspExact="237" language="en">

<ln l="1445" t="14928" r="5798" b="15110" baseLine="15062" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="14928" r="1848" b="15072">these</wd>

<space/>

<wd l="1934" t="14928" r="2534" b="15072">clusters</wd>

<space/>

<wd l="2616" t="14928" r="3182" b="15110">helped.</wd>

<space/>

<wd l="3336" t="14928" r="3917" b="15072">Results</wd>

<space/>

<wd l="4008" t="14971" r="4243" b="15072">are</wd>

<space/>

<wd l="4330" t="14928" r="4838" b="15072">shown</wd>

<space/>

<wd l="4915" t="14928" r="5069" b="15067">in</wd>

<space/>

<wd l="5146" t="14928" r="5582" b="15072">Table</wd>

<space/>

<wd l="5659" t="14928" r="5798" b="15072">4.</wd>

<space/>

</ln>

<ln l="1445" t="15168" r="5803" b="15355" baseLine="15302" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="15168" r="2088" b="15312">Without</wd>

<space/>

<wd l="2150" t="15168" r="2597" b="15355">going</wd>

<space/>

<wd l="2659" t="15168" r="2966" b="15312">into</wd>

<space/>

<wd l="3024" t="15187" r="3278" b="15312">too</wd>

<space/>

<wd l="3336" t="15168" r="3782" b="15312">much</wd>

<space/>

<wd l="3845" t="15168" r="4277" b="15312">detail</wd>

<space/>

<wd l="4339" t="15254" r="4440" b="15269">–</wd>

<space/>

<wd l="4498" t="15168" r="4738" b="15312">the</wd>

<space/>

<wd l="4805" t="15168" r="5333" b="15312">cluster</wd>

<space/>

<wd l="5386" t="15168" r="5803" b="15350">paths</wd>

</ln>

</para>

</column>

<column l="6130" t="1273" r="10555" b="15363">

<para l="6149" t="1334" r="10526" b="4656" alignment="justified" spaceBefore="27" lsp="exactly" lspExact="239" language="en">

<ln l="6154" t="1334" r="10507" b="1531" baseLine="1495">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6154" t="1406" r="6389" b="1507">are</wd>

<space/>

<wd l="6442" t="1363" r="7286" b="1507">distributed</wd>

<space/>

<wd l="7330" t="1363" r="7685" b="1507">with</wd>

<space/>

<wd l="7733" t="1363" r="8011" b="1507">this</wd>

<space/>

<wd l="8064" t="1363" r="8515" b="1531">work,</wd>

<space/>

<wd l="8573" t="1363" r="8856" b="1507">and</wd>

<space/>

<wd l="8904" t="1406" r="9101" b="1507">on</wd>

<space/>

<wd l="9144" t="1363" r="9384" b="1507">the</wd>

<space/>

</run>

<wd l="9432" t="1334" r="9878" b="1531"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">web,</run>

<run underlined="none" subsuperscript="superscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">3</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="9941" t="1363" r="10176" b="1507">for</wd>

<space/>

<wd l="10219" t="1363" r="10507" b="1507">fur-</wd>

</run>

</ln>

<ln l="6149" t="1603" r="10512" b="1786" baseLine="1738" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="1603" r="6461" b="1747">ther</wd>

<space/>

<wd l="6518" t="1603" r="7498" b="1747">examination</wd>

<space/>

<wd l="7550" t="1690" r="7651" b="1704">–</wd>

<space/>

<wd l="7714" t="1646" r="8122" b="1747">some</wd>

<space/>

<wd l="8179" t="1603" r="8880" b="1786">top-level</wd>

<space/>

<wd l="8942" t="1603" r="9931" b="1747">observations</wd>

<space/>

<wd l="9998" t="1646" r="10267" b="1747">can</wd>

<space/>

<wd l="10325" t="1603" r="10512" b="1747">be</wd>

<space/>

</ln>

<ln l="6149" t="1838" r="10507" b="2026" baseLine="1978" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="1838" r="6619" b="1982">made.</wd>

<space/>

<wd l="6730" t="1838" r="7282" b="2026">Firstly,</wd>

<space/>

<wd l="7354" t="1838" r="7598" b="1982">the</wd>

<space/>

<wd l="7661" t="1838" r="8443" b="2026">preceding</wd>

<space/>

<wd l="8510" t="1838" r="8918" b="1982">word</wd>

<space/>

<wd l="8981" t="1838" r="9110" b="1982">is</wd>

<space/>

<wd l="9182" t="1838" r="9590" b="1982">often</wd>

<space/>

<wd l="9648" t="1838" r="10507" b="2006">influential;</wd>

<space/>

</ln>

<ln l="6149" t="2078" r="10512" b="2266" baseLine="2218">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6149" t="2098" r="6490" b="2222">note</wd>

<space/>

<wd l="6547" t="2078" r="6792" b="2222">the</wd>

<space/>

<wd l="6850" t="2078" r="7243" b="2266">large</wd>

<space/>

<wd l="7301" t="2078" r="7910" b="2222">number</wd>

<space/>

<wd l="7973" t="2078" r="8146" b="2222">of</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="8174" t="2126" r="8534" b="2261">prev</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="8664" t="2078" r="9336" b="2222">features.</wd>

<space/>

<wd l="9437" t="2078" r="10205" b="2266">Secondly,</wd>

<space/>

<wd l="10272" t="2078" r="10512" b="2222">the</wd>

<space/>

</run>

</ln>

<ln l="6154" t="2318" r="10517" b="2501" baseLine="2453" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="2318" r="6754" b="2462">clusters</wd>

<space/>

<wd l="6826" t="2318" r="7478" b="2501">prefixed</wd>

<space/>

<wd l="7560" t="2318" r="8198" b="2462">111100-</wd>

<space/>

<wd l="8280" t="2318" r="9053" b="2462">contained</wd>

<space/>

<wd l="9120" t="2318" r="9595" b="2462">words</wd>

<space/>

<wd l="9677" t="2318" r="10080" b="2462">often</wd>

<space/>

<wd l="10147" t="2318" r="10517" b="2462">used</wd>

<space/>

</ln>

<ln l="6154" t="2558" r="10512" b="2746" baseLine="2693" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="2602" r="6307" b="2702">as</wd>

<space/>

<wd l="6398" t="2558" r="6638" b="2702">the</wd>

<space/>

<wd l="6725" t="2558" r="7032" b="2702">first</wd>

<space/>

<wd l="7114" t="2578" r="7483" b="2702">term</wd>

<space/>

<wd l="7560" t="2558" r="7718" b="2698">in</wd>

<space/>

<wd l="7795" t="2602" r="8232" b="2746">many</wd>

<space/>

<wd l="8318" t="2558" r="8746" b="2702">kinds</wd>

<space/>

<wd l="8837" t="2558" r="9010" b="2702">of</wd>

<space/>

<wd l="9086" t="2558" r="9566" b="2746">entity,</wd>

<space/>

<wd l="9672" t="2558" r="10512" b="2746">suggesting</wd>

<space/>

</ln>

<ln l="6154" t="2798" r="10517" b="2942" baseLine="2933" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="2798" r="7205" b="2942">distributional</wd>

<space/>

<wd l="7291" t="2798" r="8165" b="2942">similarities</wd>

<space/>

<wd l="8246" t="2798" r="8400" b="2938">in</wd>

<space/>

<wd l="8477" t="2798" r="8717" b="2942">the</wd>

<space/>

<wd l="8798" t="2798" r="9106" b="2942">first</wd>

<space/>

<wd l="9178" t="2798" r="9658" b="2942">words</wd>

<space/>

<wd l="9744" t="2798" r="9917" b="2942">of</wd>

<space/>

<wd l="9984" t="2798" r="10517" b="2942">named</wd>

<space/>

</ln>

<ln l="6154" t="3034" r="10507" b="3221" baseLine="3173" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="3034" r="6763" b="3178">entities.</wd>

<space/>

<wd l="6878" t="3038" r="7094" b="3178">As</wd>

<space/>

<wd l="7162" t="3038" r="7699" b="3178">Brown</wd>

<space/>

<wd l="7766" t="3034" r="8544" b="3221">clustering</wd>

<space/>

<wd l="8611" t="3034" r="8736" b="3178">is</wd>

<space/>

<wd l="8808" t="3034" r="9264" b="3178">based</wd>

<space/>

<wd l="9331" t="3077" r="9523" b="3178">on</wd>

<space/>

<wd l="9586" t="3034" r="10157" b="3221">bigram</wd>

<space/>

<wd l="10219" t="3034" r="10507" b="3178">dis-</wd>

</ln>

<ln l="6149" t="3274" r="10526" b="3461" baseLine="3413" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="3274" r="7214" b="3461">tributionality,</wd>

<space/>

<wd l="7291" t="3274" r="7574" b="3418">this</wd>

<space/>

<wd l="7651" t="3274" r="8203" b="3461">finding</wd>

<space/>

<wd l="8280" t="3274" r="8746" b="3461">aligns</wd>

<space/>

<wd l="8818" t="3274" r="9173" b="3418">with</wd>

<space/>

<wd l="9235" t="3274" r="9475" b="3418">the</wd>

<space/>

<wd l="9547" t="3274" r="10282" b="3418">existence</wd>

<space/>

<wd l="10354" t="3274" r="10526" b="3418">of</wd>

<space/>

</ln>

<ln l="6149" t="3514" r="10512" b="3701" baseLine="3648" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="3514" r="7454" b="3701">highly-weighted</wd>

<space/>

<wd l="7512" t="3557" r="8203" b="3658">common</wd>

<space/>

<wd l="8256" t="3514" r="9034" b="3701">preceding</wd>

<space/>

<wd l="9091" t="3514" r="9605" b="3658">tokens</wd>

<space/>

<wd l="9667" t="3557" r="10013" b="3658">seen</wd>

<space/>

<wd l="10066" t="3514" r="10219" b="3653">in</wd>

<space/>

<wd l="10272" t="3514" r="10512" b="3658">the</wd>

<space/>

</ln>

<ln l="6149" t="3754" r="10517" b="3941" baseLine="3888" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="3754" r="6643" b="3898">model</wd>

<space/>

<wd l="6725" t="3754" r="7339" b="3941">weights</wd>

<space/>

<wd l="7421" t="3754" r="7651" b="3898">for</wd>

<space/>

<wd l="7728" t="3754" r="7968" b="3898">the</wd>

<space/>

<wd l="8050" t="3773" r="8664" b="3941">notypes</wd>

<space/>

<wd l="8746" t="3754" r="9106" b="3898">task.</wd>

<space/>

<wd l="9259" t="3754" r="9888" b="3941">Thirdly,</wd>

<space/>

<wd l="9974" t="3758" r="10517" b="3898">Brown</wd>

<space/>

</ln>

<ln l="6154" t="3994" r="10507" b="4181" baseLine="4128" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="3994" r="6754" b="4138">clusters</wd>

<space/>

<wd l="6811" t="4037" r="7046" b="4138">are</wd>

<space/>

<wd l="7094" t="4037" r="7502" b="4138">more</wd>

<space/>

<wd l="7550" t="3994" r="8035" b="4138">useful</wd>

<space/>

<wd l="8088" t="3994" r="8318" b="4138">for</wd>

<space/>

<wd l="8376" t="4037" r="8784" b="4138">some</wd>

<space/>

<wd l="8837" t="3994" r="9283" b="4181">entity</wd>

<space/>

<wd l="9336" t="4013" r="9749" b="4181">types</wd>

<space/>

<wd l="9802" t="3994" r="10147" b="4138">than</wd>

<space/>

<wd l="10195" t="3994" r="10507" b="4138">oth-</wd>

</ln>

<ln l="6154" t="4234" r="10517" b="4421" baseLine="4368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="4277" r="6427" b="4402">ers;</wd>

<space/>

<wd l="6514" t="4234" r="6912" b="4378">there</wd>

<space/>

<wd l="6989" t="4277" r="7224" b="4378">are</wd>

<space/>

<wd l="7296" t="4277" r="7704" b="4378">more</wd>

<space/>

<wd l="7771" t="4234" r="8400" b="4378">features</wd>

<space/>

<wd l="8472" t="4234" r="8707" b="4378">for</wd>

<space/>

<wd l="8774" t="4277" r="9350" b="4416">person,</wd>

<space/>

<wd l="9432" t="4277" r="10152" b="4421">company</wd>

<space/>

<wd l="10229" t="4234" r="10517" b="4378">and</wd>

<space/>

</ln>

<ln l="6154" t="4469" r="8184" b="4656" baseLine="4608" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="4469" r="6744" b="4656">geo-loc</wd>

<space/>

<wd l="6797" t="4488" r="7210" b="4656">types</wd>

<space/>

<wd l="7267" t="4469" r="7608" b="4613">than</wd>

<space/>

<wd l="7666" t="4469" r="8184" b="4613">others.</wd>

</ln>

</para>

<para l="6130" t="4709" r="10517" b="8438" alignment="justified" fli="216" lsp="exactly" lspExact="239" language="en">

<ln l="6346" t="4709" r="10517" b="4896" baseLine="4848" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="4714" r="6734" b="4853">Note</wd>

<space/>

<wd l="6806" t="4709" r="7046" b="4853">the</wd>

<space/>

<wd l="7123" t="4709" r="8314" b="4896">large-weighted</wd>

<space/>

<wd l="8395" t="4709" r="9509" b="4891">shallow-depth</wd>

<space/>

<wd l="9581" t="4709" r="10205" b="4853">features</wd>

<space/>

<wd l="10282" t="4709" r="10517" b="4853">for</wd>

<space/>

</ln>

<ln l="6154" t="4949" r="10507" b="5131" baseLine="5083" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="4949" r="6763" b="5093">entities.</wd>

<space/>

<wd l="6941" t="4949" r="7267" b="5093">One</wd>

<space/>

<wd l="7354" t="4949" r="7478" b="5093">is</wd>

<space/>

<wd l="7570" t="4949" r="7800" b="5093">for</wd>

<space/>

<wd l="7882" t="4949" r="8126" b="5093">the</wd>

<space/>

<wd l="8213" t="4968" r="8650" b="5093">terms</wd>

<space/>

<wd l="8736" t="4949" r="9202" b="5093">found</wd>

<space/>

<wd l="9283" t="4949" r="9792" b="5093">before</wd>

<space/>

<wd l="9883" t="4992" r="9970" b="5093">a</wd>

<space/>

<wd l="10061" t="4973" r="10507" b="5131">sport-</wd>

</ln>

<ln l="6158" t="5189" r="10498" b="5376" baseLine="5323" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6158" t="5208" r="6614" b="5333">steam</wd>

<space/>

<wd l="6696" t="5189" r="7142" b="5376">entity</wd>

<space/>

<wd l="7234" t="5189" r="7541" b="5366">(but</wd>

<space/>

<wd l="7618" t="5208" r="7872" b="5333">not</wd>

<space/>

<wd l="7954" t="5232" r="8040" b="5333">a</wd>

<space/>

<wd l="8112" t="5232" r="8688" b="5371">person,</wd>

<space/>

<wd l="8779" t="5208" r="9120" b="5333">note</wd>

<space/>

<wd l="9202" t="5189" r="9442" b="5333">the</wd>

<space/>

<wd l="9523" t="5232" r="9768" b="5333">-ve</wd>

<space/>

<wd l="9850" t="5189" r="10498" b="5376">weight):</wd>

<space/>

</ln>

<ln l="6130" t="5429" r="10507" b="5616" baseLine="5563">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6130" t="5477" r="6490" b="5611">prev</wd>

<space/>

</run>

<wd l="6542" t="5429" r="7190" b="5611"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">p3x010</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7330" t="5429" r="7680" b="5573">This</wd>

<space/>

<wd l="7762" t="5429" r="8290" b="5573">cluster</wd>

<space/>

<wd l="8371" t="5429" r="8933" b="5573">subtree</wd>

<space/>

<wd l="9014" t="5429" r="9667" b="5573">contains</wd>

<space/>

<wd l="9749" t="5472" r="10181" b="5616">many</wd>

<space/>

<wd l="10262" t="5429" r="10507" b="5573">ad-</wd>

</run>

</ln>

<ln l="6134" t="5664" r="10507" b="5851" baseLine="5803">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6134" t="5664" r="6792" b="5851">jectives,</wd>

<space/>

<wd l="6854" t="5664" r="7685" b="5846">possessive</wd>

<space/>

<wd l="7742" t="5707" r="8477" b="5846">pronouns</wd>

<space/>

<wd l="8539" t="5664" r="8827" b="5808">and</wd>

<space/>

<wd l="8885" t="5664" r="9811" b="5808">determiners</wd>

<space/>

</run>

<wd l="9883" t="5664" r="10224" b="5842"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">the</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="10291" t="5712" r="10507" b="5832"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ur</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6149" t="5904" r="10517" b="6086" baseLine="6043">

<wd l="6149" t="5904" r="6422" b="6072"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">dis</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="6504" t="5904" r="6917" b="6072"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">each</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="6989" t="5904" r="7378" b="6072"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">mah</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7450" t="5904" r="7685" b="6048">his</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7752" t="5904" r="8136" b="6082">etc.).</wd>

<space/>

<wd l="8251" t="5904" r="8558" b="6048">The</wd>

<space/>

<wd l="8626" t="5923" r="9062" b="6048">terms</wd>

<space/>

<wd l="9134" t="5904" r="9696" b="6086">helpful</wd>

<space/>

<wd l="9763" t="5904" r="10195" b="6048">when</wd>

<space/>

<wd l="10258" t="5923" r="10517" b="6048">not</wd>

<space/>

</run>

</ln>

<ln l="6149" t="6144" r="10512" b="6331" baseLine="6278" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6144" r="6931" b="6331">preceding</wd>

<space/>

<wd l="6989" t="6144" r="7651" b="6331">geo-locs</wd>

<space/>

<wd l="7709" t="6187" r="8093" b="6288">were</wd>

<space/>

<wd l="8150" t="6144" r="8554" b="6288">close</wd>

<space/>

<wd l="8606" t="6163" r="8755" b="6288">to</wd>

<space/>

<wd l="8808" t="6144" r="9091" b="6288">this</wd>

<space/>

<wd l="9158" t="6144" r="9763" b="6312">subtree,</wd>

<space/>

<wd l="9826" t="6144" r="10512" b="6331">differing</wd>

<space/>

</ln>

<ln l="6154" t="6384" r="10517" b="6571" baseLine="6518">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6154" t="6384" r="6499" b="6571">only</wd>

<space/>

<wd l="6542" t="6384" r="6701" b="6523">in</wd>

<space/>

<wd l="6739" t="6384" r="6922" b="6528">its</wd>

<space/>

<wd l="6970" t="6384" r="8237" b="6571">least-significant</wd>

<space/>

<wd l="8275" t="6384" r="8525" b="6528">bit:</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8578" t="6432" r="8938" b="6566">prev</wd>

<space/>

</run>

<wd l="8995" t="6384" r="9638" b="6566"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">p3x011</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9710" t="6384" r="10056" b="6528">This</wd>

<space/>

<wd l="10109" t="6384" r="10517" b="6528">other</wd>

<space/>

</run>

</ln>

<ln l="6149" t="6624" r="10512" b="6811" baseLine="6758" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6624" r="7339" b="6811">large-weighted</wd>

<space/>

<wd l="7426" t="6624" r="8539" b="6806">shallow-depth</wd>

<space/>

<wd l="8616" t="6624" r="9168" b="6768">feature</wd>

<space/>

<wd l="9245" t="6667" r="9547" b="6768">was</wd>

<space/>

<wd l="9634" t="6624" r="9946" b="6768">also</wd>

<space/>

<wd l="10027" t="6624" r="10512" b="6768">useful</wd>

<space/>

</ln>

<ln l="6149" t="6859" r="10517" b="7046" baseLine="6998" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="6859" r="6379" b="7003">for</wd>

<space/>

<wd l="6456" t="6859" r="7138" b="7046">avoiding</wd>

<space/>

<wd l="7219" t="6859" r="7526" b="7003">first</wd>

<space/>

<wd l="7598" t="6878" r="8035" b="7003">terms</wd>

<space/>

<wd l="8117" t="6859" r="8290" b="7003">of</wd>

<space/>

<wd l="8352" t="6902" r="8885" b="7042">person</wd>

<space/>

<wd l="8962" t="6859" r="9571" b="7003">entities.</wd>

<space/>

<wd l="9710" t="6864" r="9902" b="7003">Its</wd>

<space/>

<wd l="9989" t="6859" r="10517" b="7003">cluster</wd>

<space/>

</ln>

<ln l="6158" t="7099" r="10507" b="7282" baseLine="7238">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6158" t="7099" r="6720" b="7243">subtree</wd>

<space/>

<wd l="6816" t="7099" r="7469" b="7243">contains</wd>

<space/>

<wd l="7565" t="7142" r="8261" b="7243">common</wd>

<space/>

<wd l="8347" t="7142" r="8818" b="7243">nouns</wd>

<space/>

<wd l="8914" t="7099" r="9197" b="7243">and</wd>

<space/>

<wd l="9288" t="7099" r="10018" b="7282">qualifiers</wd>

<space/>

</run>

<wd l="10118" t="7099" r="10507" b="7277"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">one</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6130" t="7339" r="10517" b="7526" baseLine="7474">

<wd l="6130" t="7339" r="6725" b="7522"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">people</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="6806" t="7339" r="7248" b="7522"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">good</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="7330" t="7339" r="7661" b="7507"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">shit</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="7742" t="7339" r="8074" b="7522"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">day</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="8155" t="7368" r="8611" b="7522"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">great</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

<wd l="8698" t="7339" r="9168" b="7517"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">little</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">),</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="9250" t="7339" r="9806" b="7526">though</wd>

<space/>

<wd l="9874" t="7339" r="9989" b="7483">it</wd>

<space/>

<wd l="10056" t="7339" r="10181" b="7483">is</wd>

<space/>

<wd l="10258" t="7358" r="10517" b="7483">not</wd>

<space/>

</run>

</ln>

<ln l="6149" t="7579" r="10507" b="7766" baseLine="7714" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="7579" r="7142" b="7766">immediately</wd>

<space/>

<wd l="7214" t="7579" r="7598" b="7723">clear</wd>

<space/>

<wd l="7661" t="7579" r="7997" b="7723">how</wd>

<space/>

<wd l="8069" t="7579" r="8472" b="7723">these</wd>

<space/>

<wd l="8544" t="7598" r="8981" b="7723">terms</wd>

<space/>

<wd l="9053" t="7622" r="9437" b="7723">were</wd>

<space/>

<wd l="9504" t="7579" r="10114" b="7762">helpful;</wd>

<space/>

<wd l="10195" t="7622" r="10507" b="7762">per-</wd>

</ln>

<ln l="6149" t="7819" r="10507" b="8002" baseLine="7954" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="7819" r="6509" b="8002">haps</wd>

<space/>

<wd l="6562" t="7819" r="6802" b="7963">the</wd>

<space/>

<wd l="6850" t="7819" r="7790" b="8002">prominence</wd>

<space/>

<wd l="7843" t="7819" r="8016" b="7963">of</wd>

<space/>

<wd l="8050" t="7819" r="8333" b="7963">this</wd>

<space/>

<wd l="8395" t="7819" r="8962" b="7963">subtree</wd>

<space/>

<wd l="9010" t="7819" r="9562" b="7963">feature</wd>

<space/>

<wd l="9610" t="7819" r="9734" b="7963">is</wd>

<space/>

<wd l="9792" t="7819" r="10075" b="7963">due</wd>

<space/>

<wd l="10123" t="7838" r="10277" b="7963">to</wd>

<space/>

<wd l="10325" t="7819" r="10507" b="7963">its</wd>

<space/>

</ln>

<ln l="6149" t="8059" r="10512" b="8246" baseLine="8194" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="8059" r="6936" b="8246">frequency</wd>

<space/>

<wd l="7003" t="8059" r="7474" b="8227">alone,</wd>

<space/>

<wd l="7546" t="8059" r="7829" b="8203">and</wd>

<space/>

<wd l="7886" t="8059" r="8338" b="8203">better</wd>

<space/>

<wd l="8395" t="8059" r="9490" b="8246">regularisation</wd>

<space/>

<wd l="9547" t="8059" r="9672" b="8203">is</wd>

<space/>

<wd l="9739" t="8059" r="10301" b="8203">needed</wd>

<space/>

<wd l="10358" t="8078" r="10512" b="8203">to</wd>

<space/>

</ln>

<ln l="6149" t="8294" r="6878" b="8438" baseLine="8434" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="8294" r="6677" b="8438">handle</wd>

<space/>

<wd l="6730" t="8294" r="6878" b="8438">it.</wd>

</ln>

</para>

<para l="6149" t="8683" r="7829" b="8827" alignment="left" spaceBefore="153" lsp="exactly" lspExact="235" language="en">

<ln l="6149" t="8683" r="7829" b="8827" baseLine="8818" bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="7">

<wd l="6149" t="8683" r="6394" b="8827">5.2</wd>

<space/>

<wd l="6600" t="8683" r="7018" b="8827">Gold</wd>

<space/>

<wd l="7070" t="8683" r="7829" b="8827">standard</wd>

</ln>

</para>

<para l="6149" t="8995" r="10517" b="11573" alignment="justified" spaceBefore="59" lsp="exactly" lspExact="240" language="en">

<ln l="6149" t="8995" r="10507" b="9182" baseLine="9130" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="8995" r="6624" b="9139">When</wd>

<space/>

<wd l="6734" t="8995" r="7603" b="9182">developing</wd>

<space/>

<wd l="7709" t="8995" r="7949" b="9139">the</wd>

<space/>

<wd l="8064" t="9014" r="8650" b="9182">system,</wd>

<space/>

<wd l="8774" t="9038" r="9005" b="9139">we</wd>

<space/>

<wd l="9115" t="8995" r="10085" b="9139">encountered</wd>

<space/>

<wd l="10195" t="9038" r="10507" b="9139">sev-</wd>

</ln>

<ln l="6154" t="9235" r="10502" b="9422" baseLine="9370" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="9235" r="6446" b="9379">eral</wd>

<space/>

<wd l="6499" t="9235" r="7234" b="9418">problems</wd>

<space/>

<wd l="7296" t="9235" r="7579" b="9379">and</wd>

<space/>

<wd l="7627" t="9235" r="8827" b="9379">inconsistencies</wd>

<space/>

<wd l="8885" t="9235" r="9043" b="9374">in</wd>

<space/>

<wd l="9091" t="9235" r="9331" b="9379">the</wd>

<space/>

<wd l="9389" t="9235" r="9739" b="9422">gold</wd>

<space/>

<wd l="9797" t="9235" r="10502" b="9379">standard.</wd>

<space/>

</ln>

<ln l="6149" t="9470" r="10507" b="9658" baseLine="9610" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9470" r="6624" b="9614">These</wd>

<space/>

<wd l="6686" t="9470" r="7157" b="9614">issues</wd>

<space/>

<wd l="7234" t="9514" r="7469" b="9614">are</wd>

<space/>

<wd l="7536" t="9470" r="7997" b="9658">partly</wd>

<space/>

<wd l="8069" t="9514" r="8155" b="9614">a</wd>

<space/>

<wd l="8222" t="9470" r="8798" b="9658">general</wd>

<space/>

<wd l="8866" t="9470" r="9533" b="9653">problem</wd>

<space/>

<wd l="9600" t="9470" r="9773" b="9614">of</wd>

<space/>

<wd l="9830" t="9470" r="10507" b="9653">develop-</wd>

</ln>

<ln l="6149" t="9710" r="10517" b="9898" baseLine="9850" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9710" r="6398" b="9898">ing</wd>

<space/>

<wd l="6470" t="9710" r="6821" b="9898">gold</wd>

<space/>

<wd l="6888" t="9710" r="7675" b="9878">standards,</wd>

<space/>

<wd l="7742" t="9710" r="7978" b="9854">i.e.</wd>

<space/>

<wd l="8078" t="9710" r="8323" b="9854">the</wd>

<space/>

<wd l="8386" t="9754" r="8794" b="9854">more</wd>

<space/>

<wd l="8861" t="9710" r="9830" b="9893">complicated</wd>

<space/>

<wd l="9888" t="9710" r="10128" b="9854">the</wd>

<space/>

<wd l="10190" t="9710" r="10517" b="9854">task</wd>

<space/>

</ln>

<ln l="6149" t="9950" r="10507" b="10138" baseLine="10085" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="9950" r="6326" b="10118">is,</wd>

<space/>

<wd l="6422" t="9950" r="6662" b="10094">the</wd>

<space/>

<wd l="6744" t="9994" r="7152" b="10094">more</wd>

<space/>

<wd l="7238" t="9950" r="7853" b="10094">humans</wd>

<space/>

<wd l="7939" t="9950" r="8280" b="10094">tend</wd>

<space/>

<wd l="8362" t="9970" r="8515" b="10094">to</wd>

<space/>

<wd l="8602" t="9950" r="9259" b="10138">disagree</wd>

<space/>

<wd l="9350" t="9994" r="9542" b="10094">on</wd>

<space/>

<wd l="9629" t="9970" r="10181" b="10094">correct</wd>

<space/>

<wd l="10262" t="9994" r="10507" b="10094">an-</wd>

</ln>

<ln l="6158" t="10190" r="10517" b="10368" baseLine="10325" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6158" t="10234" r="6595" b="10334">swers</wd>

<space/>

<wd l="6691" t="10190" r="7234" b="10368">(Tissot</wd>

<space/>

<wd l="7315" t="10210" r="7459" b="10334">et</wd>

<space/>

<wd l="7541" t="10190" r="7771" b="10358">al.,</wd>

<space/>

<wd l="7872" t="10190" r="8371" b="10368">2015).</wd>

<space/>

<wd l="8539" t="10195" r="8813" b="10334">For</wd>

<space/>

<wd l="8894" t="10190" r="9466" b="10334">Twitter</wd>

<space/>

<wd l="9547" t="10190" r="10075" b="10334">NERC</wd>

<space/>

<wd l="10162" t="10190" r="10517" b="10334">with</wd>

<space/>

</ln>

<ln l="6168" t="10430" r="10507" b="10618" baseLine="10565" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6168" t="10430" r="6346" b="10574">10</wd>

<space/>

<wd l="6422" t="10450" r="6888" b="10618">types,</wd>

<space/>

<wd l="6984" t="10474" r="7392" b="10574">some</wd>

<space/>

<wd l="7478" t="10430" r="7646" b="10574">of</wd>

<space/>

<wd l="7714" t="10430" r="7954" b="10574">the</wd>

<space/>

<wd l="8030" t="10430" r="8544" b="10574">tokens</wd>

<space/>

<wd l="8630" t="10474" r="8866" b="10574">are</wd>

<space/>

<wd l="8947" t="10474" r="9293" b="10618">very</wd>

<space/>

<wd l="9374" t="10430" r="9998" b="10574">difficult</wd>

<space/>

<wd l="10075" t="10450" r="10224" b="10574">to</wd>

<space/>

<wd l="10306" t="10430" r="10507" b="10574">la-</wd>

</ln>

<ln l="6149" t="10670" r="10512" b="10858" baseLine="10805" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="10670" r="6389" b="10814">bel</wd>

<space/>

<wd l="6485" t="10670" r="7114" b="10814">because</wd>

<space/>

<wd l="7210" t="10670" r="7450" b="10814">the</wd>

<space/>

<wd l="7550" t="10690" r="8131" b="10814">context</wd>

<space/>

<wd l="8222" t="10670" r="8856" b="10814">window</wd>

<space/>

<wd l="8952" t="10670" r="9082" b="10814">is</wd>

<space/>

<wd l="9178" t="10714" r="9528" b="10858">very</wd>

<space/>

<wd l="9634" t="10670" r="10051" b="10814">small</wd>

<space/>

<wd l="10157" t="10670" r="10512" b="10848">(140</wd>

<space/>

</ln>

<ln l="6154" t="10906" r="10512" b="11093" baseLine="11045" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="10906" r="7066" b="11083">characters),</wd>

<space/>

<wd l="7152" t="10906" r="7637" b="11050">which</wd>

<space/>

<wd l="7709" t="10906" r="8054" b="11050">then</wd>

<space/>

<wd l="8131" t="10906" r="8443" b="11050">also</wd>

<space/>

<wd l="8520" t="10906" r="8923" b="11050">leads</wd>

<space/>

<wd l="9000" t="10925" r="9149" b="11050">to</wd>

<space/>

<wd l="9230" t="10949" r="9994" b="11093">acronyms</wd>

<space/>

<wd l="10070" t="10906" r="10512" b="11093">being</wd>

<space/>

</ln>

<ln l="6149" t="11146" r="10517" b="11333" baseLine="11285" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="11146" r="6514" b="11290">used</wd>

<space/>

<wd l="6571" t="11189" r="6917" b="11333">very</wd>

<space/>

<wd l="6979" t="11146" r="7795" b="11333">frequently</wd>

<space/>

<wd l="7853" t="11165" r="8006" b="11290">to</wd>

<space/>

<wd l="8074" t="11189" r="8410" b="11290">save</wd>

<space/>

<wd l="8477" t="11189" r="8952" b="11328">space,</wd>

<space/>

<wd l="9024" t="11146" r="9307" b="11290">and</wd>

<space/>

<wd l="9365" t="11146" r="9994" b="11290">because</wd>

<space/>

<wd l="10051" t="11146" r="10517" b="11290">world</wd>

<space/>

</ln>

<ln l="6149" t="11386" r="9893" b="11573" baseLine="11520" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="11386" r="7018" b="11573">knowledge</wd>

<space/>

<wd l="7070" t="11386" r="7512" b="11530">about</wd>

<space/>

<wd l="7570" t="11410" r="8078" b="11568">sports,</wd>

<space/>

<wd l="8136" t="11386" r="8611" b="11530">music</wd>

<space/>

<wd l="8664" t="11405" r="8933" b="11530">etc.</wd>

<space/>

<wd l="9005" t="11386" r="9134" b="11530">is</wd>

<space/>

<wd l="9187" t="11386" r="9893" b="11568">required.</wd>

</ln>

</para>

<para l="6154" t="11626" r="10512" b="12053" alignment="justified" fli="216" lsp="exactly" lspExact="240" language="en">

<ln l="6346" t="11626" r="10512" b="11813" baseLine="11760" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="11630" r="6514" b="11765">In</wd>

<space/>

<wd l="6557" t="11626" r="7354" b="11808">particular,</wd>

<space/>

<wd l="7402" t="11626" r="7646" b="11770">the</wd>

<space/>

<wd l="7690" t="11626" r="8453" b="11813">following</wd>

<space/>

<wd l="8506" t="11669" r="9038" b="11813">groups</wd>

<space/>

<wd l="9086" t="11626" r="9259" b="11770">of</wd>

<space/>

<wd l="9293" t="11626" r="10027" b="11808">problems</wd>

<space/>

<wd l="10075" t="11626" r="10229" b="11765">in</wd>

<space/>

<wd l="10272" t="11626" r="10512" b="11770">the</wd>

<space/>

</ln>

<ln l="6154" t="11866" r="9758" b="12053" baseLine="12000" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="11866" r="6504" b="12053">gold</wd>

<space/>

<wd l="6562" t="11866" r="7229" b="12010">standard</wd>

<space/>

<wd l="7277" t="11866" r="7891" b="12053">training</wd>

<space/>

<wd l="7954" t="11909" r="8472" b="12048">corpus</wd>

<space/>

<wd l="8530" t="11909" r="8914" b="12010">were</wd>

<space/>

<wd l="8966" t="11866" r="9758" b="12010">identified:</wd>

</ln>

</para>

<para l="6149" t="12226" r="10517" b="14803" alignment="justified" spaceBefore="110" spaceAfter="72" lsp="exactly" lspExact="240" language="en">

<ln l="6149" t="12226" r="10512" b="12413" baseLine="12360">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6149" t="12226" r="6677" b="12370">Broad</wd>

<space/>

<wd l="6749" t="12226" r="7646" b="12408">categories:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="7862" t="12226" r="8347" b="12370">While</wd>

<space/>

<wd l="8424" t="12269" r="8837" b="12370">some</wd>

<space/>

<wd l="8909" t="12226" r="9082" b="12370">of</wd>

<space/>

<wd l="9139" t="12226" r="9379" b="12370">the</wd>

<space/>

<wd l="9451" t="12230" r="9715" b="12370">NE</wd>

<space/>

<wd l="9782" t="12245" r="10195" b="12413">types</wd>

<space/>

<wd l="10277" t="12269" r="10512" b="12370">are</wd>

<space/>

</run>

</ln>

<ln l="6149" t="12466" r="10512" b="12653" baseLine="12600" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="12466" r="7147" b="12610">well-defined</wd>

<space/>

<wd l="7195" t="12466" r="7526" b="12653">(e.g.</wd>

<space/>

<wd l="7594" t="12509" r="8170" b="12648">person,</wd>

<space/>

<wd l="8222" t="12466" r="8923" b="12653">geo-loc),</wd>

<space/>

<wd l="8976" t="12466" r="9384" b="12610">other</wd>

<space/>

<wd l="9422" t="12485" r="9835" b="12653">types</wd>

<space/>

<wd l="9883" t="12509" r="10123" b="12610">are</wd>

<space/>

<wd l="10162" t="12509" r="10512" b="12653">very</wd>

<space/>

</ln>

<ln l="6149" t="12701" r="10507" b="12888" baseLine="12840" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="12701" r="6605" b="12845">broad</wd>

<space/>

<wd l="6677" t="12701" r="6960" b="12845">and</wd>

<space/>

<wd l="7032" t="12701" r="7747" b="12845">therefore</wd>

<space/>

<wd l="7819" t="12744" r="8184" b="12883">pose</wd>

<space/>

<wd l="8261" t="12744" r="8347" b="12845">a</wd>

<space/>

<wd l="8414" t="12701" r="8664" b="12888">big</wd>

<space/>

<wd l="8746" t="12701" r="9542" b="12888">challenge.</wd>

<space/>

<wd l="9677" t="12701" r="10022" b="12845">This</wd>

<space/>

<wd l="10099" t="12701" r="10229" b="12845">is</wd>

<space/>

<wd l="10310" t="12701" r="10507" b="12845">al-</wd>

</ln>

<ln l="6149" t="12941" r="10517" b="13128" baseLine="13080" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="12941" r="6586" b="13128">ready</wd>

<space/>

<wd l="6658" t="12941" r="7238" b="13085">evident</wd>

<space/>

<wd l="7296" t="12941" r="7493" b="13128">by</wd>

<space/>

<wd l="7560" t="12941" r="7800" b="13085">the</wd>

<space/>

<wd l="7862" t="12941" r="8472" b="13085">number</wd>

<space/>

<wd l="8539" t="12941" r="8712" b="13085">of</wd>

<space/>

<wd l="8770" t="12960" r="9552" b="13128">gazetteers</wd>

<space/>

<wd l="9629" t="12941" r="10200" b="13085">created</wd>

<space/>

<wd l="10258" t="12984" r="10517" b="13123">per</wd>

<space/>

</ln>

<ln l="6149" t="13181" r="10517" b="13368" baseLine="13315" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="13200" r="6490" b="13368">type</wd>

<space/>

<wd l="6566" t="13181" r="6874" b="13358">(see</wd>

<space/>

<wd l="6946" t="13181" r="7378" b="13325">Table</wd>

<space/>

<wd l="7464" t="13181" r="7656" b="13358">1),</wd>

<space/>

<wd l="7733" t="13181" r="7963" b="13325">i.e.</wd>

<space/>

<wd l="8083" t="13181" r="8501" b="13325">those</wd>

<space/>

<wd l="8573" t="13181" r="9024" b="13325">broad</wd>

<space/>

<wd l="9096" t="13181" r="9888" b="13368">categories</wd>

<space/>

<wd l="9965" t="13181" r="10517" b="13325">consist</wd>

<space/>

</ln>

<ln l="6154" t="13421" r="10507" b="13608" baseLine="13555" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="13421" r="6326" b="13565">of</wd>

<space/>

<wd l="6389" t="13464" r="6826" b="13608">many</wd>

<space/>

<wd l="6912" t="13421" r="7589" b="13565">different</wd>

<space/>

<wd l="7670" t="13421" r="8400" b="13608">subtypes.</wd>

<space/>

<wd l="8554" t="13421" r="8990" b="13565">Since</wd>

<space/>

<wd l="9067" t="13421" r="9312" b="13565">the</wd>

<space/>

<wd l="9389" t="13421" r="10003" b="13608">training</wd>

<space/>

<wd l="10094" t="13440" r="10306" b="13565">set</wd>

<space/>

<wd l="10382" t="13421" r="10507" b="13565">is</wd>

<space/>

</ln>

<ln l="6149" t="13661" r="10517" b="13848" baseLine="13795" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="13704" r="6494" b="13848">very</wd>

<space/>

<wd l="6576" t="13661" r="7042" b="13829">small,</wd>

<space/>

<wd l="7128" t="13661" r="7474" b="13848">only</wd>

<space/>

<wd l="7550" t="13704" r="7637" b="13805">a</wd>

<space/>

<wd l="7699" t="13661" r="8357" b="13805">handfull</wd>

<space/>

<wd l="8438" t="13661" r="8611" b="13805">of</wd>

<space/>

<wd l="8674" t="13661" r="9413" b="13843">examples</wd>

<space/>

<wd l="9490" t="13704" r="9725" b="13805">are</wd>

<space/>

<wd l="9802" t="13661" r="10517" b="13805">observed</wd>

<space/>

</ln>

<ln l="6149" t="13896" r="10507" b="14083" baseLine="14035" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="13896" r="6379" b="14040">for</wd>

<space/>

<wd l="6432" t="13896" r="6792" b="14040">each</wd>

<space/>

<wd l="6845" t="13896" r="7454" b="14083">subtype</wd>

<space/>

<wd l="7512" t="13896" r="7843" b="14083">(e.g.</wd>

<space/>

<wd l="7915" t="13896" r="8357" b="14040">video</wd>

<space/>

<wd l="8410" t="13896" r="8942" b="14083">game),</wd>

<space/>

<wd l="8995" t="13896" r="9485" b="14040">which</wd>

<space/>

<wd l="9528" t="13896" r="10032" b="14040">makes</wd>

<space/>

<wd l="10080" t="13896" r="10507" b="14040">train-</wd>

</ln>

<ln l="6149" t="14136" r="10512" b="14323" baseLine="14275" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="14136" r="6398" b="14323">ing</wd>

<space/>

<wd l="6475" t="14179" r="6562" b="14280">a</wd>

<space/>

<wd l="6629" t="14136" r="7334" b="14280">classifier</wd>

<space/>

<wd l="7402" t="14136" r="7632" b="14280">for</wd>

<space/>

<wd l="7699" t="14136" r="8117" b="14280">those</wd>

<space/>

<wd l="8184" t="14155" r="8602" b="14323">types</wd>

<space/>

<wd l="8674" t="14179" r="9019" b="14323">very</wd>

<space/>

<wd l="9096" t="14136" r="10061" b="14323">challenging.</wd>

<space/>

<wd l="10186" t="14136" r="10512" b="14280">One</wd>

<space/>

</ln>

<ln l="6154" t="14376" r="10512" b="14563" baseLine="14510" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="14376" r="6326" b="14520">of</wd>

<space/>

<wd l="6384" t="14376" r="6624" b="14520">the</wd>

<space/>

<wd l="6701" t="14395" r="7090" b="14520">most</wd>

<space/>

<wd l="7162" t="14376" r="8083" b="14563">challenging</wd>

<space/>

<wd l="8155" t="14395" r="8573" b="14563">types</wd>

<space/>

<wd l="8650" t="14419" r="8952" b="14520">was</wd>

<space/>

<wd l="9024" t="14376" r="9754" b="14558">products,</wd>

<space/>

<wd l="9840" t="14419" r="9998" b="14520">as</wd>

<space/>

<wd l="10075" t="14419" r="10512" b="14563">many</wd>

<space/>

</ln>

<ln l="6154" t="14616" r="8707" b="14803" baseLine="14750" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="14616" r="6830" b="14760">different</wd>

<space/>

<wd l="6878" t="14616" r="7363" b="14803">things</wd>

<space/>

<wd l="7421" t="14659" r="7690" b="14760">can</wd>

<space/>

<wd l="7742" t="14616" r="7930" b="14760">be</wd>

<space/>

<wd l="7982" t="14616" r="8707" b="14798">products.</wd>

</ln>

</para>

<rulerline l="6130" t="14894" r="7349" b="14894" type="single" width="10" color="000000"/>

<para l="6149" t="14947" r="10550" b="15312" alignment="justified" spaceBefore="61" fli="216" lsp="exactly" lspExact="200" language="en">

<ln l="6408" t="14947" r="10550" b="15149" baseLine="15099">

<wd l="6408" t="14947" r="6730" b="15110"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">3</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">See</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="6763" t="14981" r="10550" b="15149">http://derczynski.com/sheffield/resources/gha.250M-</wd>

</run>

</ln>

<ln l="6149" t="15173" r="6826" b="15312" baseLine="15302" underlined="none" subsuperscript="none" fontSize="900" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6149" t="15182" r="6826" b="15312">c2000.tar</wd>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15746" r="6161" b="15975">

<para l="5804" t="15782" r="6128" b="15946" alignment="left" lsp="exactly" lspExact="223" language="en">

<ln l="5870" t="15782" r="6062" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="950" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="23">

<wd l="5870" t="15787" r="6062" b="15946">51</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4306.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1428" marginTop="1284" marginRight="1366" marginBottom="1302" offsetX="-30" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1428" t="1284" r="10543" b="15363">

<column l="1428" t="1284" r="6026" b="15363">

<para l="1445" t="1363" r="5813" b="3461" alignment="justified" ri="216" spaceBefore="31" lsp="exactly" lspExact="239" language="en">

<ln l="1450" t="1363" r="5803" b="1550" baseLine="1498">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1450" t="1363" r="2515" b="1546">Overlapping</wd>

<space/>

<wd l="2573" t="1373" r="3067" b="1546">types:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="3283" t="1363" r="3730" b="1507">Some</wd>

<space/>

<wd l="3792" t="1368" r="4128" b="1507">NEs</wd>

<space/>

<wd l="4190" t="1363" r="4728" b="1550">belong</wd>

<space/>

<wd l="4786" t="1382" r="4939" b="1507">to</wd>

<space/>

<wd l="5002" t="1406" r="5405" b="1507">more</wd>

<space/>

<wd l="5467" t="1363" r="5803" b="1507">than</wd>

<space/>

</run>

</ln>

<ln l="1450" t="1603" r="5808" b="1790" baseLine="1738" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="1646" r="1728" b="1747">one</wd>

<space/>

<wd l="1771" t="1622" r="2155" b="1790">type,</wd>

<space/>

<wd l="2203" t="1603" r="2688" b="1747">which</wd>

<space/>

<wd l="2726" t="1603" r="3230" b="1747">makes</wd>

<space/>

<wd l="3274" t="1603" r="3514" b="1747">the</wd>

<space/>

<wd l="3557" t="1603" r="4594" b="1747">classification</wd>

<space/>

<wd l="4632" t="1603" r="4954" b="1747">task</wd>

<space/>

<wd l="4997" t="1646" r="5362" b="1747">even</wd>

<space/>

<wd l="5400" t="1646" r="5808" b="1747">more</wd>

<space/>

</ln>

<ln l="1450" t="1838" r="5808" b="2026" baseLine="1978" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="1838" r="2107" b="1982">difficult.</wd>

<space/>

<wd l="2323" t="1843" r="2597" b="1982">For</wd>

<space/>

<wd l="2698" t="1838" r="3408" b="2021">example,</wd>

<space/>

<wd l="3523" t="1838" r="3634" b="1982">it</wd>

<space/>

<wd l="3730" t="1838" r="3859" b="1982">is</wd>

<space/>

<wd l="3965" t="1838" r="4589" b="1982">difficult</wd>

<space/>

<wd l="4680" t="1858" r="4834" b="1982">to</wd>

<space/>

<wd l="4939" t="1838" r="5808" b="2026">distinguish</wd>

<space/>

</ln>

<ln l="1445" t="2078" r="5808" b="2261" baseLine="2218" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="2078" r="2107" b="2222">between</wd>

<space/>

<wd l="2184" t="2078" r="3024" b="2261">companies</wd>

<space/>

<wd l="3106" t="2078" r="3394" b="2222">and</wd>

<space/>

<wd l="3461" t="2078" r="3830" b="2222">their</wd>

<space/>

<wd l="3898" t="2078" r="4579" b="2261">products</wd>

<space/>

<wd l="4656" t="2078" r="5011" b="2222">with</wd>

<space/>

<wd l="5083" t="2078" r="5323" b="2222">the</wd>

<space/>

<wd l="5410" t="2122" r="5808" b="2222">same</wd>

<space/>

</ln>

<ln l="1445" t="2318" r="5808" b="2501" baseLine="2453" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="2362" r="1915" b="2462">name.</wd>

<space/>

<wd l="2021" t="2318" r="2486" b="2462">There</wd>

<space/>

<wd l="2554" t="2362" r="2789" b="2462">are</wd>

<space/>

<wd l="2861" t="2318" r="3173" b="2462">also</wd>

<space/>

<wd l="3240" t="2318" r="4205" b="2462">inconcistent</wd>

<space/>

<wd l="4267" t="2318" r="5006" b="2501">examples</wd>

<space/>

<wd l="5078" t="2318" r="5251" b="2462">of</wd>

<space/>

<wd l="5304" t="2318" r="5587" b="2462">this</wd>

<space/>

<wd l="5654" t="2318" r="5808" b="2458">in</wd>

<space/>

</ln>

<ln l="1445" t="2558" r="5798" b="2746" baseLine="2693">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="2558" r="1685" b="2702">the</wd>

<space/>

<wd l="1733" t="2558" r="2083" b="2746">gold</wd>

<space/>

<wd l="2136" t="2558" r="2842" b="2726">standard,</wd>

<space/>

<wd l="2899" t="2602" r="3168" b="2746">e.g.</wd>

<space/>

</run>

<wd l="3245" t="2558" r="3432" b="2702"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">I</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3499" t="2558" r="3634" b="2702">O</wd>

<space/>

</run>

<wd l="3648" t="2563" r="4003" b="2741"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">just</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4085" t="2558" r="4219" b="2702">O</wd>

<space/>

</run>

<wd l="4267" t="2558" r="4858" b="2741"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">bought</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4939" t="2558" r="5074" b="2702">O</wd>

<space/>

</run>

<wd l="5117" t="2563" r="5534" b="2702"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Dior</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="5611" t="2563" r="5798" b="2698">B-</wd>

</run>

</ln>

<ln l="1445" t="2798" r="5808" b="2981" baseLine="2933">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="2798" r="2054" b="2981">product</wd>

<space/>

</run>

<wd l="2117" t="2842" r="2837" b="2942"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">mascara</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="2947" t="2798" r="3216" b="2942">O”.</wd>

<space/>

<wd l="3288" t="2803" r="3456" b="2938">In</wd>

<space/>

<wd l="3518" t="2798" r="3802" b="2942">this</wd>

<space/>

<wd l="3874" t="2798" r="4584" b="2981">example,</wd>

<space/>

</run>

<wd l="4666" t="2798" r="5208" b="2942"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Dior</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="5285" t="2798" r="5808" b="2942">should</wd>

<space/>

</run>

</ln>

<ln l="1445" t="3034" r="5813" b="3221" baseLine="3173">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="3034" r="1627" b="3178">be</wd>

<space/>

<wd l="1709" t="3034" r="2477" b="3178">annotated</wd>

<space/>

<wd l="2554" t="3077" r="2707" b="3178">as</wd>

<space/>

<wd l="2789" t="3077" r="2875" b="3178">a</wd>

<space/>

<wd l="2947" t="3077" r="3701" b="3221">company,</wd>

<space/>

<wd l="3782" t="3034" r="4037" b="3178">but</wd>

<space/>

</run>

<wd l="4109" t="3034" r="4579" b="3178"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Dior</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="4642" t="3034" r="5405" b="3178"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">mascara</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="5486" t="3077" r="5645" b="3178">as</wd>

<space/>

<wd l="5726" t="3077" r="5813" b="3178">a</wd>

<space/>

</run>

</ln>

<ln l="1445" t="3274" r="3638" b="3461" baseLine="3413" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="3274" r="2054" b="3456">product</wd>

<space/>

<wd l="2102" t="3274" r="2491" b="3418">from</wd>

<space/>

<wd l="2539" t="3274" r="2842" b="3418">that</wd>

<space/>

<wd l="2894" t="3317" r="3638" b="3461">company.</wd>

</ln>

</para>

<para l="1445" t="3653" r="5813" b="6950" alignment="justified" ri="216" spaceBefore="153" lsp="exactly" lspExact="238" language="en">

<ln l="1450" t="3653" r="5813" b="3840" baseLine="3792">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1450" t="3653" r="1771" b="3797">The</wd>

<space/>

<wd l="1838" t="3662" r="2198" b="3835">type</wd>

<space/>

</run>

<wd l="2261" t="3653" r="2746" b="3797"><run italic="true" underlined="none" subsuperscript="none" fontSize="950" fontFace="Times New Roman" fontFamily="swiss" fontPitch="variable" spacing="-2">other</run>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">:</run>

</wd>

<run bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2966" t="3653" r="3403" b="3797">Since</wd>

<space/>

<wd l="3470" t="3653" r="4306" b="3797">annotation</wd>

<space/>

<wd l="4373" t="3653" r="5179" b="3840">guidelines</wd>

<space/>

<wd l="5251" t="3696" r="5491" b="3797">are</wd>

<space/>

<wd l="5554" t="3672" r="5813" b="3797">not</wd>

<space/>

</run>

</ln>

<ln l="1450" t="3893" r="5798" b="4080" baseLine="4027" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="3893" r="2150" b="4037">available</wd>

<space/>

<wd l="2208" t="3893" r="2443" b="4037">for</wd>

<space/>

<wd l="2496" t="3893" r="2736" b="4037">the</wd>

<space/>

<wd l="2798" t="3893" r="3149" b="4080">gold</wd>

<space/>

<wd l="3216" t="3893" r="3922" b="4061">standard,</wd>

<space/>

<wd l="3984" t="3936" r="4214" b="4037">we</wd>

<space/>

<wd l="4272" t="3893" r="4579" b="4080">rely</wd>

<space/>

<wd l="4642" t="3893" r="5242" b="4080">entirely</wd>

<space/>

<wd l="5309" t="3936" r="5501" b="4037">on</wd>

<space/>

<wd l="5563" t="3936" r="5798" b="4037">ex-</wd>

</ln>

<ln l="1450" t="4133" r="5798" b="4320" baseLine="4267" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="4133" r="2002" b="4315">amples</wd>

<space/>

<wd l="2059" t="4133" r="2213" b="4272">in</wd>

<space/>

<wd l="2261" t="4133" r="2501" b="4277">the</wd>

<space/>

<wd l="2554" t="4133" r="3173" b="4320">training</wd>

<space/>

<wd l="3235" t="4152" r="3446" b="4277">set</wd>

<space/>

<wd l="3494" t="4152" r="3648" b="4277">to</wd>

<space/>

<wd l="3701" t="4133" r="4315" b="4320">identify</wd>

<space/>

<wd l="4368" t="4133" r="4762" b="4277">what</wd>

<space/>

<wd l="4814" t="4133" r="5501" b="4320">subtypes</wd>

<space/>

<wd l="5554" t="4133" r="5798" b="4277">be-</wd>

</ln>

<ln l="1445" t="4373" r="5813" b="4560" baseLine="4507" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="4373" r="1795" b="4560">long</wd>

<space/>

<wd l="1867" t="4392" r="2016" b="4517">to</wd>

<space/>

<wd l="2088" t="4373" r="2333" b="4517">the</wd>

<space/>

<wd l="2400" t="4392" r="2741" b="4560">type</wd>

<space/>

<wd l="2818" t="4373" r="3437" b="4517">“other”.</wd>

<space/>

<wd l="3566" t="4373" r="4051" b="4517">While</wd>

<space/>

<wd l="4123" t="4392" r="4512" b="4517">most</wd>

<space/>

<wd l="4584" t="4373" r="5323" b="4555">examples</wd>

<space/>

<wd l="5410" t="4416" r="5813" b="4517">seem</wd>

<space/>

</ln>

<ln l="1445" t="4608" r="5803" b="4795" baseLine="4747" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="4627" r="1594" b="4752">to</wd>

<space/>

<wd l="1666" t="4608" r="1853" b="4752">be</wd>

<space/>

<wd l="1920" t="4608" r="2419" b="4790">public</wd>

<space/>

<wd l="2486" t="4608" r="3158" b="4795">holidays</wd>

<space/>

<wd l="3235" t="4608" r="3518" b="4752">and</wd>

<space/>

<wd l="3590" t="4627" r="4133" b="4776">events,</wd>

<space/>

<wd l="4210" t="4608" r="4450" b="4752">the</wd>

<space/>

<wd l="4522" t="4627" r="4862" b="4795">type</wd>

<space/>

<wd l="4939" t="4608" r="5251" b="4752">also</wd>

<space/>

<wd l="5333" t="4651" r="5803" b="4752">seems</wd>

<space/>

</ln>

<ln l="1445" t="4848" r="5798" b="5035" baseLine="4987" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="4867" r="1594" b="4992">to</wd>

<space/>

<wd l="1646" t="4848" r="1834" b="4992">be</wd>

<space/>

<wd l="1882" t="4848" r="2246" b="4992">used</wd>

<space/>

<wd l="2294" t="4848" r="2530" b="4992">for</wd>

<space/>

<wd l="2582" t="4848" r="3518" b="5035">overlapping</wd>

<space/>

<wd l="3576" t="4891" r="3734" b="4992">or</wd>

<space/>

<wd l="3787" t="4848" r="4555" b="4992">otherwise</wd>

<space/>

<wd l="4608" t="4848" r="5194" b="4992">unclear</wd>

<space/>

<wd l="5242" t="4848" r="5798" b="4992">tokens.</wd>

<space/>

</ln>

<ln l="1445" t="5083" r="5798" b="5275" baseLine="5222">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="5088" r="2222" b="5270">Examples</wd>

<space/>

<wd l="2275" t="5088" r="2510" b="5232">for</wd>

<space/>

<wd l="2554" t="5088" r="2837" b="5232">this</wd>

<space/>

<wd l="2894" t="5131" r="3130" b="5232">are</wd>

<space/>

</run>

<wd l="3182" t="5088" r="3739" b="5232"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Radio</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="3802" t="5088" r="3974" b="5227"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">1</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4037" t="5088" r="4186" b="5266">(a</wd>

<space/>

<wd l="4229" t="5088" r="5242" b="5275">broadcasting</wd>

<space/>

<wd l="5299" t="5131" r="5798" b="5275">organ-</wd>

</run>

</ln>

<ln l="1445" t="5328" r="5803" b="5515" baseLine="5462">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1445" t="5328" r="2083" b="5506">isation),</wd>

<space/>

</run>

<wd l="2146" t="5328" r="2938" b="5472"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">UMASS</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="3005" t="5328" r="3154" b="5506">(a</wd>

<space/>

<wd l="3197" t="5328" r="4094" b="5515">university),</wd>

<space/>

</run>

<wd l="4152" t="5328" r="5054" b="5472"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Edmonton</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

<wd l="5102" t="5328" r="5803" b="5472"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Journal</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="1454" t="5568" r="5813" b="5755" baseLine="5702">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1454" t="5568" r="1598" b="5746">(a</wd>

<space/>

<wd l="1685" t="5568" r="2698" b="5755">broadcasting</wd>

<space/>

<wd l="2789" t="5568" r="3874" b="5755">organisation),</wd>

<space/>

</run>

<wd l="3979" t="5568" r="4598" b="5712"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Dems</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="4699" t="5568" r="5621" b="5746">(democrats,</wd>

<space/>

<wd l="5726" t="5611" r="5813" b="5712">a</wd>

<space/>

</run>

</ln>

<ln l="1450" t="5808" r="5803" b="5995" baseLine="5942" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="5851" r="1906" b="5995">group</wd>

<space/>

<wd l="1968" t="5808" r="2136" b="5952">of</wd>

<space/>

<wd l="2179" t="5808" r="2707" b="5990">people</wd>

<space/>

<wd l="2770" t="5851" r="2933" b="5952">or</wd>

<space/>

<wd l="2990" t="5851" r="3077" b="5952">a</wd>

<space/>

<wd l="3125" t="5808" r="3811" b="5990">policical</wd>

<space/>

<wd l="3864" t="5808" r="4378" b="5995">party).</wd>

<space/>

<wd l="4464" t="5808" r="4771" b="5952">The</wd>

<space/>

<wd l="4824" t="5827" r="5165" b="5995">type</wd>

<space/>

<wd l="5227" t="5808" r="5803" b="5952">“other”</wd>

<space/>

</ln>

<ln l="1445" t="6043" r="5798" b="6230" baseLine="6182" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="6043" r="1570" b="6187">is</wd>

<space/>

<wd l="1627" t="6043" r="1939" b="6187">also</wd>

<space/>

<wd l="1992" t="6086" r="2275" b="6187">one</wd>

<space/>

<wd l="2323" t="6043" r="2554" b="6187">for</wd>

<space/>

<wd l="2602" t="6043" r="3091" b="6187">which</wd>

<space/>

<wd l="3139" t="6043" r="3974" b="6187">annotation</wd>

<space/>

<wd l="4027" t="6043" r="4834" b="6230">guidelines</wd>

<space/>

<wd l="4891" t="6043" r="5323" b="6187">differ</wd>

<space/>

<wd l="5371" t="6043" r="5798" b="6187">heav-</wd>

</ln>

<ln l="1445" t="6283" r="5803" b="6470" baseLine="6422" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="6283" r="1651" b="6470">ily</wd>

<space/>

<wd l="1709" t="6370" r="1810" b="6384">–</wd>

<space/>

<wd l="1867" t="6283" r="2549" b="6470">meaning</wd>

<space/>

<wd l="2611" t="6283" r="3614" b="6466">performance</wd>

<space/>

<wd l="3682" t="6283" r="4032" b="6427">does</wd>

<space/>

<wd l="4099" t="6302" r="4354" b="6427">not</wd>

<space/>

<wd l="4406" t="6283" r="5059" b="6427">increase</wd>

<space/>

<wd l="5117" t="6283" r="5251" b="6422">if</wd>

<space/>

<wd l="5299" t="6326" r="5530" b="6427">we</wd>

<space/>

<wd l="5587" t="6302" r="5803" b="6470">try</wd>

<space/>

</ln>

<ln l="1445" t="6523" r="5798" b="6710" baseLine="6658" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="6542" r="1594" b="6667">to</wd>

<space/>

<wd l="1666" t="6542" r="2429" b="6710">aggregate</wd>

<space/>

<wd l="2491" t="6523" r="2731" b="6667">the</wd>

<space/>

<wd l="2798" t="6523" r="3149" b="6710">gold</wd>

<space/>

<wd l="3221" t="6523" r="3888" b="6667">standard</wd>

<space/>

<wd l="3950" t="6566" r="4474" b="6706">corpus</wd>

<space/>

<wd l="4541" t="6523" r="4896" b="6667">with</wd>

<space/>

<wd l="4958" t="6566" r="5304" b="6667">over</wd>

<space/>

<wd l="5366" t="6523" r="5798" b="6667">avail-</wd>

</ln>

<ln l="1450" t="6763" r="4090" b="6950" baseLine="6898" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="6763" r="1771" b="6907">able</wd>

<space/>

<wd l="1824" t="6763" r="2395" b="6907">Twitter</wd>

<space/>

<wd l="2443" t="6768" r="2846" b="6907">NER</wd>

<space/>

<wd l="2899" t="6763" r="3250" b="6950">gold</wd>

<space/>

<wd l="3307" t="6763" r="4090" b="6907">standards.</wd>

</ln>

</para>

<para l="1445" t="7142" r="5818" b="11395" alignment="justified" ri="216" spaceBefore="142" lsp="exactly" lspExact="239" language="en">

<ln l="1445" t="7142" r="5808" b="7325" baseLine="7277">

<run bold="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1445" t="7142" r="2472" b="7286">Inconsistent</wd>

<space/>

<wd l="2602" t="7142" r="3514" b="7286">annotation</wd>

<space/>

<wd l="3638" t="7142" r="3888" b="7286">for</wd>

<space/>

<wd l="4013" t="7142" r="4810" b="7325">hashtags:</wd>

<space/>

</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="5021" t="7147" r="5808" b="7325">Important</wd>

<space/>

</run>

</ln>

<ln l="1445" t="7382" r="5803" b="7570" baseLine="7517" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="7382" r="1920" b="7526">words</wd>

<space/>

<wd l="2016" t="7382" r="2174" b="7522">in</wd>

<space/>

<wd l="2261" t="7402" r="2765" b="7526">tweets</wd>

<space/>

<wd l="2861" t="7426" r="3096" b="7526">are</wd>

<space/>

<wd l="3192" t="7382" r="3600" b="7526">often</wd>

<space/>

<wd l="3686" t="7382" r="4406" b="7565">preceded</wd>

<space/>

<wd l="4493" t="7382" r="4690" b="7570">by</wd>

<space/>

<wd l="4786" t="7426" r="4872" b="7526">a</wd>

<space/>

<wd l="4958" t="7382" r="5563" b="7570">hashtag</wd>

<space/>

<wd l="5654" t="7402" r="5803" b="7526">to</wd>

<space/>

</ln>

<ln l="1450" t="7618" r="5808" b="7805" baseLine="7757">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1450" t="7618" r="2270" b="7800">emphasise</wd>

<space/>

<wd l="2347" t="7618" r="2789" b="7786">them,</wd>

<space/>

<wd l="2880" t="7661" r="3154" b="7805">e.g.</wd>

<space/>

</run>

<wd l="3302" t="7618" r="4742" b="7800"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">#JenniferAniston</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="4824" t="7646" r="5261" b="7800">quote</wd>

<space/>

<wd l="5342" t="7618" r="5520" b="7800">of</wd>

<space/>

<wd l="5568" t="7618" r="5808" b="7762">the</wd>

<space/>

</run>

</ln>

<ln l="1445" t="7858" r="5803" b="8045" baseLine="7997">

<wd l="1445" t="7858" r="1858" b="8040"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">day</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="2030" t="7858" r="2635" b="8040">Despite</wd>

<space/>

<wd l="2722" t="7858" r="2962" b="8002">the</wd>

<space/>

<wd l="3048" t="7858" r="3346" b="8002">fact</wd>

<space/>

<wd l="3427" t="7858" r="3730" b="8002">that</wd>

<space/>

<wd l="3811" t="7901" r="4248" b="8045">many</wd>

<space/>

<wd l="4339" t="7858" r="4507" b="8002">of</wd>

<space/>

<wd l="4584" t="7858" r="4824" b="8002">the</wd>

<space/>

<wd l="4915" t="7858" r="5198" b="8002">327</wd>

<space/>

<wd l="5290" t="7858" r="5803" b="8002">tokens</wd>

<space/>

</run>

</ln>

<ln l="1454" t="8098" r="5818" b="8285" baseLine="8232" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1454" t="8098" r="2035" b="8285">starting</wd>

<space/>

<wd l="2112" t="8098" r="2467" b="8242">with</wd>

<space/>

<wd l="2534" t="8098" r="3216" b="8285">hashtags</wd>

<space/>

<wd l="3293" t="8141" r="3677" b="8242">were</wd>

<space/>

<wd l="3749" t="8098" r="4282" b="8242">named</wd>

<space/>

<wd l="4354" t="8098" r="4968" b="8266">entities,</wd>

<space/>

<wd l="5054" t="8098" r="5400" b="8285">only</wd>

<space/>

<wd l="5477" t="8098" r="5563" b="8242">5</wd>

<space/>

<wd l="5645" t="8098" r="5818" b="8242">of</wd>

<space/>

</ln>

<ln l="1445" t="8338" r="5803" b="8525" baseLine="8472" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8338" r="1843" b="8482">them</wd>

<space/>

<wd l="1939" t="8381" r="2174" b="8482">are</wd>

<space/>

<wd l="2270" t="8338" r="3043" b="8482">annotated</wd>

<space/>

<wd l="3134" t="8338" r="3490" b="8482">with</wd>

<space/>

<wd l="3581" t="8342" r="3845" b="8482">NE</wd>

<space/>

<wd l="3936" t="8357" r="4349" b="8525">types</wd>

<space/>

<wd l="4454" t="8338" r="4997" b="8515">(#Vh1:</wd>

<space/>

<wd l="5160" t="8338" r="5803" b="8506">B-other,</wd>

<space/>

</ln>

<ln l="1445" t="8578" r="5794" b="8765" baseLine="8712" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8582" r="2102" b="8722">#Astros:</wd>

<space/>

<wd l="2208" t="8582" r="3317" b="8760">B-sportsteam,</wd>

<space/>

<wd l="3389" t="8582" r="4104" b="8722">#Denver:</wd>

<space/>

<wd l="4210" t="8578" r="5050" b="8765">B-geo-loc,</wd>

<space/>

<wd l="5126" t="8578" r="5794" b="8722">#Padres:</wd>

<space/>

</ln>

<ln l="1445" t="8813" r="5803" b="9000" baseLine="8952" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="8818" r="2549" b="8995">B-sportsteam,</wd>

<space/>

<wd l="2611" t="8813" r="3216" b="8957">#BB11:</wd>

<space/>

<wd l="3307" t="8813" r="4181" b="8990">B-tvshow).</wd>

<space/>

<wd l="4272" t="8813" r="4579" b="8957">The</wd>

<space/>

<wd l="4637" t="8813" r="5011" b="8957">false</wd>

<space/>

<wd l="5069" t="8813" r="5803" b="9000">negatives</wd>

<space/>

</ln>

<ln l="1445" t="9053" r="5803" b="9240" baseLine="9192" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="9053" r="1982" b="9240">belong</wd>

<space/>

<wd l="2064" t="9072" r="2218" b="9197">to</wd>

<space/>

<wd l="2304" t="9053" r="2981" b="9197">different</wd>

<space/>

<wd l="3062" t="9058" r="3326" b="9197">NE</wd>

<space/>

<wd l="3408" t="9072" r="3821" b="9240">types</wd>

<space/>

<wd l="3912" t="9053" r="4195" b="9197">and</wd>

<space/>

<wd l="4277" t="9096" r="4517" b="9197">are</wd>

<space/>

<wd l="4598" t="9053" r="5136" b="9240">mostly</wd>

<space/>

<wd l="5222" t="9096" r="5568" b="9240">easy</wd>

<space/>

<wd l="5654" t="9072" r="5803" b="9197">to</wd>

<space/>

</ln>

<ln l="1454" t="9293" r="5808" b="9480" baseLine="9427" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1454" t="9312" r="1776" b="9475">spot</wd>

<space/>

<wd l="1862" t="9293" r="2198" b="9480">(e.g.</wd>

<space/>

<wd l="2357" t="9293" r="3576" b="9461">#BROOKLYN,</wd>

<space/>

<wd l="3662" t="9293" r="4886" b="9480">#lindsaylohan).</wd>

<space/>

<wd l="5045" t="9298" r="5189" b="9432">A</wd>

<space/>

<wd l="5266" t="9293" r="5808" b="9437">related</wd>

<space/>

</ln>

<ln l="1445" t="9533" r="5808" b="9715" baseLine="9667" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="9533" r="2112" b="9715">problem</wd>

<space/>

<wd l="2155" t="9533" r="2285" b="9677">is</wd>

<space/>

<wd l="2338" t="9533" r="2582" b="9677">the</wd>

<space/>

<wd l="2635" t="9533" r="3475" b="9677">annotation</wd>

<space/>

<wd l="3528" t="9533" r="3701" b="9677">of</wd>

<space/>

<wd l="3744" t="9533" r="4195" b="9677">direct</wd>

<space/>

<wd l="4243" t="9533" r="4968" b="9677">mentions</wd>

<space/>

<wd l="5026" t="9533" r="5198" b="9677">of</wd>

<space/>

<wd l="5237" t="9533" r="5808" b="9677">Twitter</wd>

<space/>

</ln>

<ln l="1450" t="9773" r="5789" b="9960" baseLine="9907">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1450" t="9792" r="2136" b="9917">accounts</wd>

<space/>

<wd l="2208" t="9773" r="2563" b="9917">with</wd>

<space/>

<wd l="2650" t="9773" r="2794" b="9917">@</wd>

<space/>

<wd l="2885" t="9773" r="3038" b="9912">in</wd>

<space/>

<wd l="3115" t="9792" r="3912" b="9941">sentences,</wd>

<space/>

<wd l="3998" t="9816" r="4229" b="9960">e.g</wd>

<space/>

<wd l="4301" t="9773" r="4454" b="9912">in</wd>

<space/>

</run>

<wd l="4526" t="9773" r="4882" b="9917"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">All</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="5002" t="9773" r="5136" b="9917">O</wd>

<space/>

</run>

<wd l="5218" t="9773" r="5789" b="9955"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">caught</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

</ln>

<ln l="1450" t="10013" r="5798" b="10195" baseLine="10147">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1450" t="10013" r="1584" b="10157">O</wd>

<space/>

</run>

<wd l="1651" t="10056" r="1886" b="10195"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">up</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="1987" t="10013" r="2122" b="10157">O</wd>

<space/>

</run>

<wd l="2189" t="10013" r="2568" b="10157"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">with</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="2674" t="10013" r="2808" b="10157">O</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="2894" t="10018" r="3442" b="10157">@SHO</wd>

<space/>

</run>

<wd l="3514" t="10013" r="4042" b="10157"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">weeds</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="4147" t="10013" r="4282" b="10157">O</wd>

<space/>

</run>

<wd l="4349" t="10018" r="4450" b="10157"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">!</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">:</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="4555" t="10013" r="4819" b="10157">O”.</wd>

<space/>

<wd l="4891" t="10018" r="5059" b="10152">In</wd>

<space/>

<wd l="5117" t="10013" r="5419" b="10157">that</wd>

<space/>

<wd l="5486" t="10056" r="5798" b="10157">sen-</wd>

</run>

</ln>

<ln l="1445" t="10248" r="5798" b="10416" baseLine="10387">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1445" t="10267" r="1906" b="10416">tence,</wd>

<space/>

</run>

<wd l="1982" t="10248" r="2635" b="10392"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">@SHO</run>

</wd>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

</run>

<wd l="2707" t="10248" r="3278" b="10392"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">weeds</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="3346" t="10248" r="3792" b="10392">refers</wd>

<space/>

<wd l="3859" t="10267" r="4013" b="10392">to</wd>

<space/>

<wd l="4080" t="10248" r="4320" b="10392">the</wd>

<space/>

<wd l="4387" t="10248" r="5184" b="10392">Showtime</wd>

<space/>

<wd l="5246" t="10253" r="5510" b="10392">TV</wd>

<space/>

<wd l="5587" t="10291" r="5798" b="10392">se-</wd>

</run>

</ln>

<ln l="1445" t="10488" r="5798" b="10632" baseLine="10627">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1445" t="10488" r="1723" b="10632">ries</wd>

<space/>

</run>

<wd l="1786" t="10488" r="2453" b="10632"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">“</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Weeds</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">”</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="2510" t="10488" r="2794" b="10632">and</wd>

<space/>

<wd l="2851" t="10488" r="3374" b="10632">should</wd>

<space/>

<wd l="3422" t="10488" r="3610" b="10632">be</wd>

<space/>

<wd l="3662" t="10488" r="4435" b="10632">annotated</wd>

<space/>

<wd l="4483" t="10531" r="4642" b="10632">as</wd>

<space/>

</run>

<wd l="4699" t="10488" r="5285" b="10632"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">tvshow</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="5362" t="10493" r="5798" b="10632">How-</wd>

</run>

</ln>

<ln l="1450" t="10728" r="5803" b="10915" baseLine="10862" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="10771" r="1814" b="10896">ever,</wd>

<space/>

<wd l="1886" t="10728" r="2078" b="10872">all</wd>

<space/>

<wd l="2136" t="10728" r="2650" b="10872">tokens</wd>

<space/>

<wd l="2722" t="10728" r="3307" b="10915">starting</wd>

<space/>

<wd l="3370" t="10728" r="3725" b="10872">with</wd>

<space/>

<wd l="3802" t="10728" r="3946" b="10872">@</wd>

<space/>

<wd l="4027" t="10771" r="4262" b="10872">are</wd>

<space/>

<wd l="4330" t="10728" r="5098" b="10872">annotated</wd>

<space/>

<wd l="5160" t="10771" r="5318" b="10872">as</wd>

<space/>

<wd l="5386" t="10728" r="5568" b="10896">O,</wd>

<space/>

<wd l="5640" t="10771" r="5803" b="10872">so</wd>

<space/>

</ln>

<ln l="1450" t="10968" r="5798" b="11155" baseLine="11102" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1450" t="11011" r="1814" b="11112">even</wd>

<space/>

<wd l="1877" t="10968" r="2434" b="11155">though</wd>

<space/>

<wd l="2496" t="10968" r="2779" b="11112">this</wd>

<space/>

<wd l="2851" t="10968" r="2976" b="11112">is</wd>

<space/>

<wd l="3048" t="10987" r="3302" b="11112">not</wd>

<space/>

<wd l="3370" t="10968" r="4339" b="11155">neccessarily</wd>

<space/>

<wd l="4411" t="10987" r="5002" b="11136">correct,</wd>

<space/>

<wd l="5078" t="10968" r="5194" b="11112">it</wd>

<space/>

<wd l="5256" t="10968" r="5381" b="11112">is</wd>

<space/>

<wd l="5458" t="11011" r="5798" b="11112">con-</wd>

</ln>

<ln l="1454" t="11208" r="3970" b="11395" baseLine="11342" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1454" t="11208" r="1954" b="11352">sistent</wd>

<space/>

<wd l="2002" t="11208" r="2510" b="11352">within</wd>

<space/>

<wd l="2563" t="11208" r="2803" b="11352">the</wd>

<space/>

<wd l="2861" t="11208" r="3211" b="11395">gold</wd>

<space/>

<wd l="3269" t="11208" r="3970" b="11352">standard.</wd>

</ln>

</para>

<para l="1450" t="11640" r="2942" b="11813" alignment="left" spaceBefore="202" lsp="exactly" lspExact="274" language="en">

<ln l="1450" t="11640" r="2942" b="11813" baseLine="11808" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="12">

<wd l="1450" t="11640" r="1560" b="11813">6</wd>

<space/>

<wd l="1810" t="11640" r="2942" b="11813">Conclusion</wd>

</ln>

</para>

<para l="1445" t="12048" r="5808" b="13426" alignment="justified" ri="216" spaceBefore="126" lsp="exactly" lspExact="239" language="en">

<ln l="1445" t="12048" r="5808" b="12235" baseLine="12182" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="12048" r="1790" b="12192">This</wd>

<space/>

<wd l="1891" t="12091" r="2333" b="12230">paper</wd>

<space/>

<wd l="2424" t="12048" r="2683" b="12192">has</wd>

<space/>

<wd l="2789" t="12048" r="3547" b="12192">described</wd>

<space/>

<wd l="3638" t="12048" r="3878" b="12192">the</wd>

<space/>

<wd l="3974" t="12048" r="4478" b="12192">USFD</wd>

<space/>

<wd l="4584" t="12067" r="5131" b="12235">system</wd>

<space/>

<wd l="5227" t="12048" r="5808" b="12192">entered</wd>

<space/>

</ln>

<ln l="1445" t="12288" r="5808" b="12475" baseLine="12422" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="12288" r="1598" b="12427">in</wd>

<space/>

<wd l="1694" t="12293" r="2342" b="12432">W-NUT</wd>

<space/>

<wd l="2448" t="12288" r="2880" b="12432">2015.</wd>

<space/>

<wd l="3091" t="12293" r="3216" b="12432">It</wd>

<space/>

<wd l="3317" t="12288" r="3984" b="12432">achieves</wd>

<space/>

<wd l="4085" t="12288" r="5088" b="12470">performance</wd>

<space/>

<wd l="5189" t="12288" r="5808" b="12475">through</wd>

<space/>

</ln>

<ln l="1445" t="12523" r="5808" b="12710" baseLine="12662" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="12523" r="2496" b="12706">unsupervised</wd>

<space/>

<wd l="2626" t="12523" r="3178" b="12667">feature</wd>

<space/>

<wd l="3312" t="12523" r="4190" b="12710">generation,</wd>

<space/>

<wd l="4349" t="12523" r="4973" b="12710">through</wd>

<space/>

<wd l="5102" t="12523" r="5808" b="12667">Freebase</wd>

<space/>

</ln>

<ln l="1450" t="12763" r="5803" b="12950" baseLine="12902" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1450" t="12782" r="2280" b="12950">gazetteers,</wd>

<space/>

<wd l="2342" t="12763" r="2626" b="12907">and</wd>

<space/>

<wd l="2674" t="12763" r="3293" b="12950">through</wd>

<space/>

<wd l="3341" t="12763" r="4133" b="12950">weighting</wd>

<space/>

<wd l="4186" t="12763" r="4598" b="12946">input</wd>

<space/>

<wd l="4646" t="12763" r="4978" b="12907">data</wd>

<space/>

<wd l="5026" t="12763" r="5803" b="12950">according</wd>

<space/>

</ln>

<ln l="1445" t="13003" r="5808" b="13190" baseLine="13138" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="13022" r="1594" b="13147">to</wd>

<space/>

<wd l="1646" t="13003" r="1829" b="13147">its</wd>

<space/>

<wd l="1891" t="13003" r="2362" b="13190">origin</wd>

<space/>

<wd l="2414" t="13003" r="2741" b="13147">date</wd>

<space/>

<wd l="2789" t="13003" r="2947" b="13142">in</wd>

<space/>

<wd l="3000" t="13003" r="3418" b="13147">order</wd>

<space/>

<wd l="3466" t="13022" r="3614" b="13147">to</wd>

<space/>

<wd l="3672" t="13022" r="4291" b="13147">account</wd>

<space/>

<wd l="4339" t="13003" r="4570" b="13147">for</wd>

<space/>

<wd l="4622" t="13003" r="5002" b="13147">drift.</wd>

<space/>

<wd l="5074" t="13003" r="5419" b="13147">This</wd>

<space/>

<wd l="5477" t="13003" r="5808" b="13147">lead</wd>

<space/>

</ln>

<ln l="1445" t="13243" r="4997" b="13426" baseLine="13378" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="13262" r="1594" b="13387">to</wd>

<space/>

<wd l="1656" t="13243" r="2832" b="13387">state-of-the-art</wd>

<space/>

<wd l="2880" t="13243" r="3451" b="13387">Twitter</wd>

<space/>

<wd l="3504" t="13248" r="3902" b="13387">NER</wd>

<space/>

<wd l="3950" t="13243" r="4997" b="13426">performance.</wd>

</ln>

</para>

<para l="1445" t="13675" r="3307" b="13896" alignment="left" spaceBefore="201" lsp="exactly" lspExact="274" language="en">

<ln l="1445" t="13675" r="3307" b="13896" baseLine="13843" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="13675" r="3307" b="13896">Acknowledgments</wd>

</ln>

</para>

<para l="1445" t="14083" r="5803" b="14750" alignment="justified" ri="216" spaceBefore="159" spaceAfter="78" lsp="exactly" lspExact="239" language="en">

<ln l="1445" t="14083" r="5803" b="14270" baseLine="14218" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">

<wd l="1445" t="14083" r="1790" b="14227">This</wd>

<space/>

<wd l="1886" t="14083" r="2299" b="14227">work</wd>

<space/>

<wd l="2386" t="14126" r="2688" b="14227">was</wd>

<space/>

<wd l="2789" t="14083" r="3446" b="14270">partially</wd>

<space/>

<wd l="3552" t="14083" r="4330" b="14266">supported</wd>

<space/>

<wd l="4421" t="14083" r="4613" b="14270">by</wd>

<space/>

<wd l="4709" t="14083" r="4949" b="14227">the</wd>

<space/>

<wd l="5045" t="14088" r="5803" b="14266">European</wd>

<space/>

</ln>

<ln l="1445" t="14294" r="5794" b="14510" baseLine="14454">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1445" t="14323" r="1944" b="14467">Union</wd>

<space/>

<wd l="2021" t="14323" r="2472" b="14467">under</wd>

<space/>

<wd l="2558" t="14342" r="2962" b="14510">grant</wd>

<space/>

<wd l="3043" t="14342" r="3874" b="14510">agreement</wd>

<space/>

<wd l="3950" t="14328" r="4229" b="14467">No.</wd>

<space/>

<wd l="4397" t="14323" r="4973" b="14467">611233</wd>

<space/>

</run>

<wd l="5074" t="14294" r="5794" b="14491"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">P</run>

<run underlined="none" subsuperscript="none" fontSize="800" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">HEME</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">,</run>

<run underlined="none" subsuperscript="superscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">4</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><space/>

</run>

</ln>

<ln l="1450" t="14534" r="5789" b="14750" baseLine="14695">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><wd l="1450" t="14563" r="1733" b="14707">and</wd>

<space/>

<wd l="1776" t="14563" r="2016" b="14707">the</wd>

<space/>

<wd l="2064" t="14568" r="2352" b="14707">UK</wd>

<space/>

<wd l="2400" t="14563" r="3005" b="14707">EPSRC</wd>

<space/>

<wd l="3058" t="14582" r="3466" b="14750">grant</wd>

<space/>

<wd l="3509" t="14568" r="3792" b="14707">No.</wd>

<space/>

<wd l="3864" t="14563" r="5030" b="14707">EP/K017896/1</wd>

<space/>

</run>

<wd l="5093" t="14534" r="5789" b="14750"><run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">uComp.</run>

<run underlined="none" subsuperscript="superscript" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3">5</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="700" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-3"><nl orig="true"/>

</run>

</ln>

</para>

<rulerline l="1428" t="14875" r="2640" b="14875" type="single" width="10" color="000000"/>

<para l="1694" t="14928" r="3802" b="15341" alignment="left" li="216" ri="2160" spaceBefore="40" spaceAfter="9" lsp="exactly" lspExact="216" language="en">

<ln l="1694" t="14928" r="3802" b="15120" baseLine="15082">

<wd l="1694" t="14928" r="3802" b="15120"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0">4</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0">http://www.pheme.eu</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0"><space/>

</run>

</ln>

<ln l="1694" t="15144" r="3802" b="15341" baseLine="15298">

<wd l="1694" t="15144" r="3802" b="15341"><run underlined="none" subsuperscript="superscript" fontSize="600" fontFace="Courier New" fontFamily="roman" fontPitch="variable" spacing="0">5</run>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0">http://www.ucomp.eu</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="900" fontFace="Courier New" fontFamily="modern" fontPitch="fixed" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

</column>

<column l="6118" t="1284" r="10543" b="15363">

<para l="6154" t="1334" r="7253" b="1507" alignment="left" spaceBefore="3" lsp="exactly" lspExact="274" language="en">

<ln l="6154" t="1334" r="7253" b="1507" baseLine="1498" bold="true" underlined="none" subsuperscript="none" fontSize="1200" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="1334" r="7253" b="1507">References</wd>

</ln>

</para>

<para l="6149" t="1718" r="10512" b="2563" alignment="justified" li="216" spaceBefore="127" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6149" t="1718" r="10502" b="1906" baseLine="1853" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="1723" r="6254" b="1862">I.</wd>

<space/>

<wd l="6336" t="1718" r="7286" b="1906">Augenstein,</wd>

<space/>

<wd l="7373" t="1723" r="7555" b="1862">D.</wd>

<space/>

<wd l="7637" t="1718" r="8400" b="1906">Maynard,</wd>

<space/>

<wd l="8491" t="1718" r="8774" b="1862">and</wd>

<space/>

<wd l="8846" t="1723" r="8981" b="1862">F.</wd>

<space/>

<wd l="9067" t="1718" r="9912" b="1906">Ciravegna.</wd>

<space/>

<wd l="10070" t="1718" r="10502" b="1862">2014.</wd>

<space/>

</ln>

<ln l="6346" t="1934" r="10507" b="2122" baseLine="2074" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="1934" r="7022" b="2078">Relation</wd>

<space/>

<wd l="7066" t="1934" r="7858" b="2078">extraction</wd>

<space/>

<wd l="7896" t="1934" r="8285" b="2078">from</wd>

<space/>

<wd l="8323" t="1934" r="8563" b="2078">the</wd>

<space/>

<wd l="8606" t="1934" r="8933" b="2078">web</wd>

<space/>

<wd l="8976" t="1934" r="9403" b="2122">using</wd>

<space/>

<wd l="9451" t="1934" r="9984" b="2078">distant</wd>

<space/>

<wd l="10027" t="1978" r="10507" b="2117">super-</wd>

</ln>

<ln l="6346" t="2155" r="10512" b="2338" baseLine="2290">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6346" t="2155" r="6874" b="2299">vision.</wd>

<space/>

<wd l="7008" t="2160" r="7176" b="2294">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7243" t="2155" r="8136" b="2338">Knowledge</wd>

<space/>

<wd l="8198" t="2160" r="9182" b="2338">Engineering</wd>

<space/>

<wd l="9254" t="2155" r="9557" b="2299">and</wd>

<space/>

<wd l="9619" t="2155" r="10512" b="2338">Knowledge</wd>

<space/>

</run>

</ln>

<ln l="6341" t="2376" r="9298" b="2563" baseLine="2510">

<wd l="6341" t="2381" r="7426" b="2558"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Management</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="7483" t="2419" r="7930" b="2563">pages</wd>

<space/>

<wd l="7992" t="2376" r="8525" b="2520">26–41.</wd>

<space/>

<wd l="8592" t="2376" r="9298" b="2563">Springer.</wd>

</run>

</ln>

</para>

<para l="6149" t="2755" r="10522" b="4037" alignment="justified" li="216" spaceBefore="164" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6149" t="2755" r="10507" b="2923" baseLine="2890" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="2760" r="6293" b="2899">T.</wd>

<space/>

<wd l="6355" t="2755" r="7070" b="2923">Baldwin,</wd>

<space/>

<wd l="7128" t="2760" r="7301" b="2899">B.</wd>

<space/>

<wd l="7358" t="2760" r="7733" b="2923">Han,</wd>

<space/>

<wd l="7790" t="2760" r="8006" b="2899">M.</wd>

<space/>

<wd l="8064" t="2760" r="8280" b="2899">M.</wd>

<space/>

<wd l="8347" t="2755" r="8510" b="2899">C.</wd>

<space/>

<wd l="8578" t="2755" r="8760" b="2899">de</wd>

<space/>

<wd l="8808" t="2755" r="9590" b="2923">Marneffe,</wd>

<space/>

<wd l="9643" t="2760" r="10051" b="2899">Y.-B.</wd>

<space/>

<wd l="10114" t="2755" r="10507" b="2923">Kim,</wd>

<space/>

</ln>

<ln l="6346" t="2976" r="10507" b="3163" baseLine="3110" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="2981" r="6528" b="3120">A.</wd>

<space/>

<wd l="6610" t="2976" r="7099" b="3144">Ritter,</wd>

<space/>

<wd l="7181" t="2976" r="7464" b="3120">and</wd>

<space/>

<wd l="7531" t="2981" r="7738" b="3120">W.</wd>

<space/>

<wd l="7814" t="2981" r="8098" b="3120">Xu.</wd>

<space/>

<wd l="8237" t="2976" r="8669" b="3120">2015.</wd>

<space/>

<wd l="8808" t="2976" r="9499" b="3163">Findings</wd>

<space/>

<wd l="9576" t="2976" r="9749" b="3120">of</wd>

<space/>

<wd l="9806" t="2976" r="10046" b="3120">the</wd>

<space/>

<wd l="10118" t="2976" r="10507" b="3120">2015</wd>

<space/>

</ln>

<ln l="6346" t="3192" r="10507" b="3379" baseLine="3331">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6346" t="3192" r="7157" b="3374">Workshop</wd>

<space/>

<wd l="7243" t="3235" r="7440" b="3336">on</wd>

<space/>

<wd l="7517" t="3192" r="7987" b="3379">Noisy</wd>

<space/>

<wd l="8069" t="3192" r="9283" b="3379">User-generated</wd>

<space/>

<wd l="9360" t="3197" r="9744" b="3336">Text.</wd>

<space/>

<wd l="9917" t="3197" r="10080" b="3331">In</wd>

<space/>

</run>

<wd l="10157" t="3197" r="10507" b="3336" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Pro-</wd>

</ln>

<ln l="6350" t="3413" r="10522" b="3595" baseLine="3547" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6350" t="3413" r="7046" b="3595">ceedings</wd>

<space/>

<wd l="7114" t="3413" r="7296" b="3595">of</wd>

<space/>

<wd l="7334" t="3413" r="7570" b="3557">the</wd>

<space/>

<wd l="7646" t="3413" r="8424" b="3595">Workshop</wd>

<space/>

<wd l="8496" t="3461" r="8688" b="3557">on</wd>

<space/>

<wd l="8750" t="3418" r="9206" b="3595">Noisy</wd>

<space/>

<wd l="9293" t="3413" r="10522" b="3595">User-generated</wd>

<space/>

</ln>

<ln l="6355" t="3634" r="10512" b="3816" baseLine="3768">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6355" t="3638" r="6677" b="3778">Text</wd>

<space/>

<wd l="6744" t="3638" r="7378" b="3811">(WNUT</wd>

<space/>

</run>

<wd l="7430" t="3634" r="7934" b="3811"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">2015)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="8016" t="3634" r="8957" b="3778">Association</wd>

<space/>

<wd l="9024" t="3634" r="9259" b="3778">for</wd>

<space/>

<wd l="9336" t="3634" r="10512" b="3816">Computational</wd>

<space/>

</run>

</ln>

<ln l="6346" t="3850" r="7272" b="4037" baseLine="3989" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="3850" r="7272" b="4037">Linguistics.</wd>

</ln>

</para>

<para l="6154" t="4234" r="10517" b="4421" alignment="justified" spaceBefore="146" lsp="exactly" lspExact="233" language="en">

<ln l="6154" t="4234" r="10517" b="4421" baseLine="4368" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="4">

<wd l="6154" t="4238" r="6331" b="4378">K.</wd>

<space/>

<wd l="6437" t="4234" r="7243" b="4402">Bollacker,</wd>

<space/>

<wd l="7358" t="4234" r="7526" b="4378">C.</wd>

<space/>

<wd l="7632" t="4238" r="8155" b="4402">Evans,</wd>

<space/>

<wd l="8266" t="4238" r="8395" b="4378">P.</wd>

<space/>

<wd l="8501" t="4234" r="9192" b="4402">Paritosh,</wd>

<space/>

<wd l="9302" t="4238" r="9451" b="4378">T.</wd>

<space/>

<wd l="9562" t="4234" r="10114" b="4421">Sturge,</wd>

<space/>

<wd l="10229" t="4234" r="10517" b="4378">and</wd>

</ln>

</para>

<para l="6346" t="4450" r="10512" b="5515" alignment="justified" li="216" spaceBefore="4" lsp="exactly" lspExact="218" language="en">

<ln l="6346" t="4450" r="10507" b="4637" baseLine="4589" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="4454" r="6466" b="4594">J.</wd>

<space/>

<wd l="6552" t="4450" r="7094" b="4637">Taylor.</wd>

<space/>

<wd l="7277" t="4450" r="7709" b="4594">2008.</wd>

<space/>

<wd l="7886" t="4450" r="8635" b="4594">Freebase:</wd>

<space/>

<wd l="8774" t="4493" r="8861" b="4594">a</wd>

<space/>

<wd l="8942" t="4450" r="10123" b="4637">collaboratively</wd>

<space/>

<wd l="10210" t="4493" r="10507" b="4594">cre-</wd>

</ln>

<ln l="6350" t="4670" r="10507" b="4858" baseLine="4805" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="4670" r="6682" b="4814">ated</wd>

<space/>

<wd l="6768" t="4670" r="7219" b="4858">graph</wd>

<space/>

<wd l="7306" t="4670" r="7982" b="4814">database</wd>

<space/>

<wd l="8069" t="4670" r="8299" b="4814">for</wd>

<space/>

<wd l="8390" t="4670" r="9245" b="4858">structuring</wd>

<space/>

<wd l="9331" t="4670" r="9874" b="4814">human</wd>

<space/>

<wd l="9955" t="4670" r="10507" b="4814">knowl-</wd>

</ln>

<ln l="6350" t="4891" r="10512" b="5078" baseLine="5026">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6350" t="4891" r="6763" b="5078">edge.</wd>

<space/>

<wd l="6931" t="4896" r="7094" b="5030">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="7171" t="4891" r="8160" b="5074">Proceedings</wd>

<space/>

<wd l="8242" t="4891" r="8419" b="5074">of</wd>

<space/>

<wd l="8472" t="4891" r="8707" b="5035">the</wd>

<space/>

<wd l="8784" t="4891" r="9182" b="5035">2008</wd>

<space/>

<wd l="9245" t="4896" r="9686" b="5035">ACM</wd>

<space/>

<wd l="9749" t="4896" r="10512" b="5035">SIGMOD</wd>

<space/>

</run>

</ln>

<ln l="6355" t="5107" r="10507" b="5290" baseLine="5246">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="5107" r="7392" b="5251">international</wd>

<space/>

<wd l="7488" t="5107" r="8347" b="5290">conference</wd>

<space/>

<wd l="8443" t="5155" r="8635" b="5251">on</wd>

<space/>

<wd l="8726" t="5112" r="9778" b="5290">Management</wd>

<space/>

<wd l="9869" t="5107" r="10046" b="5290">of</wd>

<space/>

</run>

<wd l="10109" t="5107" r="10507" b="5275"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">data</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="6346" t="5328" r="8333" b="5515" baseLine="5462" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="5371" r="6797" b="5515">pages</wd>

<space/>

<wd l="6869" t="5328" r="7786" b="5472">1247–1250.</wd>

<space/>

<wd l="7848" t="5328" r="8333" b="5472">ACM.</wd>

</ln>

</para>

<para l="6154" t="5707" r="10517" b="7210" alignment="justified" li="216" spaceBefore="169" fli="-216" lsp="exactly" lspExact="218" language="en">

<ln l="6154" t="5707" r="10507" b="5894" baseLine="5846" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6154" t="5712" r="6331" b="5851">K.</wd>

<space/>

<wd l="6408" t="5707" r="7291" b="5875">Bontcheva,</wd>

<space/>

<wd l="7368" t="5712" r="7526" b="5851">L.</wd>

<space/>

<wd l="7603" t="5707" r="8554" b="5894">Derczynski,</wd>

<space/>

<wd l="8626" t="5712" r="8808" b="5851">A.</wd>

<space/>

<wd l="8885" t="5707" r="9336" b="5875">Funk,</wd>

<space/>

<wd l="9413" t="5712" r="9629" b="5851">M.</wd>

<space/>

<wd l="9701" t="5712" r="9883" b="5851">A.</wd>

<space/>

<wd l="9965" t="5707" r="10507" b="5851">Green-</wd>

</ln>

<ln l="6346" t="5928" r="10498" b="6115" baseLine="6062" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="5928" r="6830" b="6096">wood,</wd>

<space/>

<wd l="6902" t="5933" r="7085" b="6072">D.</wd>

<space/>

<wd l="7157" t="5928" r="7920" b="6115">Maynard,</wd>

<space/>

<wd l="7992" t="5928" r="8275" b="6072">and</wd>

<space/>

<wd l="8338" t="5933" r="8520" b="6072">N.</wd>

<space/>

<wd l="8592" t="5928" r="9235" b="6072">Aswani.</wd>

<space/>

<wd l="9360" t="5928" r="9792" b="6072">2013.</wd>

<space/>

<wd l="9912" t="5928" r="10498" b="6072">TwitIE:</wd>

<space/>

</ln>

<ln l="6346" t="6144" r="10517" b="6326" baseLine="6283" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="6149" r="6590" b="6283">An</wd>

<space/>

<wd l="6638" t="6144" r="7680" b="6326">Open-Source</wd>

<space/>

<wd l="7723" t="6144" r="8678" b="6288">Information</wd>

<space/>

<wd l="8717" t="6144" r="9547" b="6288">Extraction</wd>

<space/>

<wd l="9586" t="6144" r="10238" b="6326">Pipeline</wd>

<space/>

<wd l="10282" t="6144" r="10517" b="6288">for</wd>

<space/>

</ln>

<ln l="6346" t="6365" r="10517" b="6552" baseLine="6499">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6346" t="6365" r="7186" b="6552">Microblog</wd>

<space/>

<wd l="7238" t="6370" r="7627" b="6509">Text.</wd>

<space/>

<wd l="7709" t="6370" r="7877" b="6504">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="7925" t="6365" r="8914" b="6547">Proceedings</wd>

<space/>

<wd l="8966" t="6365" r="9149" b="6547">of</wd>

<space/>

<wd l="9173" t="6365" r="9408" b="6509">the</wd>

<space/>

<wd l="9456" t="6365" r="10517" b="6509">International</wd>

<space/>

</run>

</ln>

<ln l="6355" t="6586" r="10507" b="6768" baseLine="6720" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6355" t="6586" r="7258" b="6768">Conference</wd>

<space/>

<wd l="7363" t="6634" r="7555" b="6730">on</wd>

<space/>

<wd l="7656" t="6590" r="8208" b="6730">Recent</wd>

<space/>

<wd l="8285" t="6586" r="9062" b="6730">Advances</wd>

<space/>

<wd l="9173" t="6590" r="9317" b="6730">in</wd>

<space/>

<wd l="9418" t="6586" r="10042" b="6730">Natural</wd>

<space/>

<wd l="10133" t="6590" r="10507" b="6730">Lan-</wd>

</ln>

<ln l="6346" t="6802" r="10512" b="6984" baseLine="6941">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="6346" t="6850" r="6830" b="6984">guage</wd>

<space/>

</run>

<wd l="6931" t="6806" r="7843" b="6984"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Processing</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="7958" t="6802" r="8899" b="6946">Association</wd>

<space/>

<wd l="8995" t="6802" r="9230" b="6946">for</wd>

<space/>

<wd l="9336" t="6802" r="10512" b="6984">Computational</wd>

<space/>

</run>

</ln>

<ln l="6346" t="7022" r="7272" b="7210" baseLine="7157" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="6346" t="7022" r="7272" b="7210">Linguistics.</wd>

</ln>

</para>

<para l="6149" t="7402" r="10517" b="8203" alignment="justified" li="216" spaceBefore="165" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6149" t="7402" r="10517" b="7570" baseLine="7541" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6149" t="7406" r="6274" b="7546">P.</wd>

<space/>

<wd l="6374" t="7406" r="6955" b="7570">Brown,</wd>

<space/>

<wd l="7061" t="7406" r="7219" b="7546">V.</wd>

<space/>

<wd l="7320" t="7402" r="7752" b="7546">Della</wd>

<space/>

<wd l="7838" t="7402" r="8347" b="7570">Pietra,</wd>

<space/>

<wd l="8453" t="7406" r="8582" b="7546">P.</wd>

<space/>

<wd l="8688" t="7402" r="8866" b="7546">de</wd>

<space/>

<wd l="8962" t="7402" r="9490" b="7570">Souza,</wd>

<space/>

<wd l="9595" t="7406" r="9710" b="7546">J.</wd>

<space/>

<wd l="9811" t="7402" r="10118" b="7570">Lai,</wd>

<space/>

<wd l="10229" t="7402" r="10517" b="7546">and</wd>

<space/>

</ln>

<ln l="6346" t="7622" r="10507" b="7810" baseLine="7757" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6346" t="7627" r="6518" b="7766">R.</wd>

<space/>

<wd l="6571" t="7627" r="7171" b="7766">Mercer.</wd>

<space/>

<wd l="7277" t="7622" r="7694" b="7766">1992.</wd>

<space/>

<wd l="7786" t="7622" r="8731" b="7766">Class-based</wd>

<space/>

<wd l="8770" t="7666" r="9350" b="7810">n-gram</wd>

<space/>

<wd l="9384" t="7622" r="9955" b="7766">models</wd>

<space/>

<wd l="10003" t="7622" r="10176" b="7766">of</wd>

<space/>

<wd l="10205" t="7642" r="10507" b="7766">nat-</wd>

</ln>

<ln l="6346" t="7843" r="10517" b="8030" baseLine="7978">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="6346" t="7843" r="6653" b="7987">ural</wd>

<space/>

<wd l="6720" t="7843" r="7478" b="8030">language.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="7618" t="7843" r="8808" b="8026">Computational</wd>

<space/>

</run>

<wd l="8866" t="7848" r="9787" b="8026"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">Linguistics</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

<wd l="9878" t="7843" r="10517" b="7987">18:467–</wd>

<space/>

</run>

</ln>

<ln l="6346" t="8059" r="6686" b="8203" baseLine="8198" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="6346" t="8059" r="6686" b="8203">479.</wd>

</ln>

</para>

<para l="6149" t="8443" r="10507" b="9245" alignment="justified" li="216" spaceBefore="161" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6149" t="8443" r="10507" b="8630" baseLine="8578" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="8448" r="6365" b="8587">M.</wd>

<space/>

<wd l="6432" t="8448" r="7104" b="8611">Brucato,</wd>

<space/>

<wd l="7166" t="8448" r="7330" b="8587">L.</wd>

<space/>

<wd l="7397" t="8443" r="8347" b="8630">Derczynski,</wd>

<space/>

<wd l="8410" t="8448" r="8592" b="8587">H.</wd>

<space/>

<wd l="8659" t="8443" r="9307" b="8611">Llorens,</wd>

<space/>

<wd l="9379" t="8448" r="9557" b="8587">K.</wd>

<space/>

<wd l="9624" t="8443" r="10507" b="8611">Bontcheva,</wd>

<space/>

</ln>

<ln l="6350" t="8659" r="10507" b="8846" baseLine="8798" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="8659" r="6638" b="8803">and</wd>

<space/>

<wd l="6696" t="8659" r="6864" b="8803">C.</wd>

<space/>

<wd l="6936" t="8659" r="7080" b="8803">S.</wd>

<space/>

<wd l="7152" t="8664" r="7718" b="8803">Jensen.</wd>

<space/>

<wd l="7834" t="8659" r="8266" b="8803">2013.</wd>

<space/>

<wd l="8371" t="8659" r="9365" b="8846">Recognising</wd>

<space/>

<wd l="9432" t="8659" r="9715" b="8803">and</wd>

<space/>

<wd l="9773" t="8659" r="10507" b="8842">interpret-</wd>

</ln>

<ln l="6346" t="8880" r="10507" b="9067" baseLine="9014">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6346" t="8880" r="6600" b="9067">ing</wd>

<space/>

<wd l="6662" t="8880" r="7195" b="9024">named</wd>

<space/>

<wd l="7258" t="8880" r="7963" b="9062">temporal</wd>

<space/>

<wd l="8030" t="8880" r="8990" b="9062">expressions.</wd>

<space/>

<wd l="9110" t="8885" r="9278" b="9019">In</wd>

<space/>

</run>

<wd l="9336" t="8885" r="9994" b="9048"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">RANLP</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="10061" t="8923" r="10507" b="9067">pages</wd>

<space/>

</run>

</ln>

<ln l="6365" t="9101" r="7085" b="9245" baseLine="9235" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6365" t="9101" r="7085" b="9245">113–121.</wd>

</ln>

</para>

<para l="6154" t="9480" r="10517" b="10104" alignment="justified" li="216" spaceBefore="159" fli="-216" lsp="exactly" lspExact="221" language="en">

<ln l="6154" t="9480" r="10507" b="9667" baseLine="9614" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6154" t="9480" r="6322" b="9624">C.</wd>

<space/>

<wd l="6389" t="9480" r="6936" b="9667">Cherry</wd>

<space/>

<wd l="6994" t="9480" r="7282" b="9624">and</wd>

<space/>

<wd l="7334" t="9485" r="7512" b="9624">H.</wd>

<space/>

<wd l="7584" t="9480" r="7958" b="9624">Guo.</wd>

<space/>

<wd l="8059" t="9480" r="8491" b="9624">2015.</wd>

<space/>

<wd l="8587" t="9480" r="8894" b="9624">The</wd>

<space/>

<wd l="8952" t="9480" r="9998" b="9624">unreasonable</wd>

<space/>

<wd l="10061" t="9480" r="10507" b="9624">effec-</wd>

</ln>

<ln l="6346" t="9701" r="10517" b="9883" baseLine="9835" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="9701" r="6974" b="9845">tiveness</wd>

<space/>

<wd l="7056" t="9701" r="7229" b="9845">of</wd>

<space/>

<wd l="7286" t="9701" r="7694" b="9845">word</wd>

<space/>

<wd l="7766" t="9701" r="8966" b="9883">representations</wd>

<space/>

<wd l="9038" t="9701" r="9274" b="9845">for</wd>

<space/>

<wd l="9341" t="9701" r="9912" b="9845">Twitter</wd>

<space/>

<wd l="9984" t="9701" r="10517" b="9845">named</wd>

<space/>

</ln>

<ln l="6350" t="9917" r="9221" b="10104" baseLine="10056">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6350" t="9917" r="6797" b="10104">entity</wd>

<space/>

<wd l="6850" t="9917" r="7795" b="10104">recognition.</wd>

<space/>

<wd l="7882" t="9922" r="8050" b="10056">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="8098" t="9922" r="8506" b="10061">Proc.</wd>

<space/>

</run>

<wd l="8568" t="9922" r="9221" b="10061"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">NAACL</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="6149" t="10301" r="10526" b="11362" alignment="justified" li="216" spaceBefore="162" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6149" t="10301" r="10507" b="10488" baseLine="10435" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="10306" r="6322" b="10445">B.</wd>

<space/>

<wd l="6394" t="10301" r="6571" b="10445">O.</wd>

<space/>

<wd l="6643" t="10301" r="7272" b="10469">Connor,</wd>

<space/>

<wd l="7339" t="10306" r="7555" b="10445">M.</wd>

<space/>

<wd l="7632" t="10301" r="8266" b="10488">Krieger,</wd>

<space/>

<wd l="8338" t="10301" r="8621" b="10445">and</wd>

<space/>

<wd l="8678" t="10306" r="8861" b="10445">D.</wd>

<space/>

<wd l="8928" t="10301" r="9312" b="10445">Ahn.</wd>

<space/>

<wd l="9422" t="10301" r="9854" b="10445">2010.</wd>

<space/>

<wd l="9965" t="10306" r="10507" b="10445">Tweet-</wd>

</ln>

<ln l="6346" t="10517" r="10507" b="10704" baseLine="10656" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="10517" r="6840" b="10661">Motif:</wd>

<space/>

<wd l="6984" t="10517" r="7930" b="10704">Exploratory</wd>

<space/>

<wd l="8021" t="10517" r="8558" b="10661">Search</wd>

<space/>

<wd l="8645" t="10517" r="8928" b="10661">and</wd>

<space/>

<wd l="9010" t="10517" r="9456" b="10699">Topic</wd>

<space/>

<wd l="9542" t="10517" r="10507" b="10661">Summariza-</wd>

</ln>

<ln l="6346" t="10738" r="10526" b="10920" baseLine="10872">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6346" t="10738" r="6658" b="10882">tion</wd>

<space/>

<wd l="6715" t="10738" r="6950" b="10882">for</wd>

<space/>

<wd l="7003" t="10738" r="7603" b="10882">Twitter.</wd>

<space/>

<wd l="7709" t="10742" r="7877" b="10877">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="7934" t="10738" r="8918" b="10920">Proceedings</wd>

<space/>

<wd l="8986" t="10738" r="9163" b="10920">of</wd>

<space/>

<wd l="9197" t="10738" r="9432" b="10882">the</wd>

<space/>

<wd l="9494" t="10738" r="10022" b="10882">Fourth</wd>

<space/>

<wd l="10066" t="10742" r="10526" b="10877">AAAI</wd>

<space/>

</run>

</ln>

<ln l="6355" t="10954" r="10507" b="11136" baseLine="11093">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6355" t="10954" r="7258" b="11136">Conference</wd>

<space/>

<wd l="7310" t="11002" r="7498" b="11098">on</wd>

<space/>

<wd l="7560" t="10954" r="8213" b="11136">Weblogs</wd>

<space/>

<wd l="8261" t="10954" r="8563" b="11098">and</wd>

<space/>

<wd l="8602" t="10954" r="9106" b="11098">Social</wd>

<space/>

<wd l="9139" t="10954" r="9653" b="11098">Media</wd>

<space/>

</run>

<wd l="9706" t="10958" r="10507" b="11131"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">(ICWSM)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="6346" t="11174" r="7589" b="11362" baseLine="11314" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="11218" r="6797" b="11362">pages</wd>

<space/>

<wd l="6854" t="11174" r="7589" b="11318">384–385.</wd>

</ln>

</para>

<para l="6149" t="11554" r="10522" b="12619" alignment="justified" li="216" spaceBefore="163" fli="-216" lsp="exactly" lspExact="218" language="en">

<ln l="6149" t="11554" r="10517" b="11741" baseLine="11693" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6149" t="11558" r="6331" b="11698">H.</wd>

<space/>

<wd l="6466" t="11554" r="7531" b="11741">Cunningham,</wd>

<space/>

<wd l="7675" t="11558" r="7858" b="11698">D.</wd>

<space/>

<wd l="7982" t="11554" r="8746" b="11741">Maynard,</wd>

<space/>

<wd l="8894" t="11558" r="9072" b="11698">K.</wd>

<space/>

<wd l="9202" t="11554" r="10085" b="11722">Bontcheva,</wd>

<space/>

<wd l="10229" t="11554" r="10517" b="11698">and</wd>

<space/>

</ln>

<ln l="6346" t="11774" r="10507" b="11918" baseLine="11909" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6346" t="11779" r="6504" b="11918">V.</wd>

<space/>

<wd l="6571" t="11774" r="7147" b="11918">Tablan.</wd>

<space/>

<wd l="7248" t="11774" r="7680" b="11918">2002.</wd>

<space/>

<wd l="7786" t="11774" r="8328" b="11918">GATE:</wd>

<space/>

<wd l="8405" t="11818" r="8582" b="11918">an</wd>

<space/>

<wd l="8640" t="11774" r="9634" b="11918">Architecture</wd>

<space/>

<wd l="9691" t="11774" r="9926" b="11918">for</wd>

<space/>

<wd l="9979" t="11774" r="10507" b="11918">Devel-</wd>

</ln>

<ln l="6350" t="11995" r="10507" b="12178" baseLine="12130">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6350" t="12014" r="6946" b="12178">opment</wd>

<space/>

<wd l="7022" t="11995" r="7195" b="12139">of</wd>

<space/>

<wd l="7258" t="11995" r="7824" b="12139">Robust</wd>

<space/>

<wd l="7891" t="12000" r="8261" b="12134">HLT</wd>

<space/>

<wd l="8338" t="11995" r="9394" b="12178">Applications.</wd>

<space/>

<wd l="9552" t="12000" r="9720" b="12134">In</wd>

<space/>

</run>

<wd l="9792" t="11995" r="10507" b="12139" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Proceed-</wd>

</ln>

<ln l="6355" t="12211" r="10522" b="12394" baseLine="12350" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6355" t="12216" r="6677" b="12394">ings</wd>

<space/>

<wd l="6744" t="12211" r="6926" b="12394">of</wd>

<space/>

<wd l="6960" t="12211" r="7200" b="12355">the</wd>

<space/>

<wd l="7262" t="12211" r="7613" b="12355">40th</wd>

<space/>

<wd l="7666" t="12211" r="8256" b="12355">Annual</wd>

<space/>

<wd l="8309" t="12216" r="8966" b="12394">Meeting</wd>

<space/>

<wd l="9034" t="12259" r="9226" b="12355">on</wd>

<space/>

<wd l="9274" t="12216" r="10214" b="12355">Association</wd>

<space/>

<wd l="10248" t="12211" r="10522" b="12394">for</wd>

<space/>

</ln>

<ln l="6355" t="12432" r="9806" b="12619" baseLine="12566">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6355" t="12432" r="7546" b="12614">Computational</wd>

<space/>

</run>

<wd l="7589" t="12437" r="8510" b="12614"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Linguistics</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="8568" t="12475" r="9014" b="12619">pages</wd>

<space/>

<wd l="9091" t="12432" r="9806" b="12576">168–175.</wd>

</run>

</ln>

</para>

<para l="6149" t="12811" r="10517" b="14098" alignment="justified" li="216" spaceBefore="165" fli="-216" lsp="exactly" lspExact="219" language="en">

<ln l="6149" t="12811" r="10507" b="12998" baseLine="12950" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6149" t="12816" r="6307" b="12955">L.</wd>

<space/>

<wd l="6413" t="12811" r="7320" b="12998">Derczynski</wd>

<space/>

<wd l="7421" t="12811" r="7704" b="12955">and</wd>

<space/>

<wd l="7805" t="12816" r="7982" b="12955">K.</wd>

<space/>

<wd l="8088" t="12811" r="8971" b="12955">Bontcheva.</wd>

<space/>

<wd l="9202" t="12811" r="9634" b="12955">2014.</wd>

<space/>

<wd l="9859" t="12811" r="10507" b="12955">Passive-</wd>

</ln>

<ln l="6350" t="13032" r="10512" b="13219" baseLine="13166" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6350" t="13032" r="7181" b="13219">aggressive</wd>

<space/>

<wd l="7310" t="13075" r="8026" b="13214">sequence</wd>

<space/>

<wd l="8150" t="13032" r="8789" b="13219">labeling</wd>

<space/>

<wd l="8909" t="13032" r="9264" b="13176">with</wd>

<space/>

<wd l="9389" t="13032" r="10512" b="13176">discriminative</wd>

<space/>

</ln>

<ln l="6346" t="13253" r="10502" b="13440" baseLine="13387" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6346" t="13253" r="7296" b="13440">post-editing</wd>

<space/>

<wd l="7339" t="13253" r="7570" b="13397">for</wd>

<space/>

<wd l="7608" t="13253" r="8534" b="13440">recognising</wd>

<space/>

<wd l="8578" t="13296" r="9110" b="13435">person</wd>

<space/>

<wd l="9154" t="13253" r="9715" b="13397">entities</wd>

<space/>

<wd l="9763" t="13253" r="9917" b="13392">in</wd>

<space/>

<wd l="9955" t="13272" r="10502" b="13397">tweets.</wd>

<space/>

</ln>

<ln l="6346" t="13469" r="10507" b="13651" baseLine="13608">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6346" t="13474" r="6514" b="13608">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="6576" t="13469" r="7560" b="13651">Proceedings</wd>

<space/>

<wd l="7627" t="13469" r="7810" b="13651">of</wd>

<space/>

<wd l="7848" t="13469" r="8083" b="13613">the</wd>

<space/>

<wd l="8155" t="13469" r="8496" b="13613">14th</wd>

<space/>

<wd l="8573" t="13469" r="9470" b="13651">Conference</wd>

<space/>

<wd l="9542" t="13469" r="9720" b="13651">of</wd>

<space/>

<wd l="9758" t="13469" r="9994" b="13613">the</wd>

<space/>

<wd l="10051" t="13474" r="10507" b="13613">Euro-</wd>

</run>

</ln>

<ln l="6326" t="13690" r="10517" b="13872" baseLine="13824" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="6326" t="13738" r="6730" b="13872">pean</wd>

<space/>

<wd l="6806" t="13690" r="7459" b="13872">Chapter</wd>

<space/>

<wd l="7517" t="13690" r="7699" b="13872">of</wd>

<space/>

<wd l="7733" t="13690" r="7968" b="13834">the</wd>

<space/>

<wd l="8016" t="13694" r="8957" b="13834">Association</wd>

<space/>

<wd l="8990" t="13690" r="9264" b="13872">for</wd>

<space/>

<wd l="9326" t="13690" r="10517" b="13872">Computational</wd>

<space/>

</ln>

<ln l="6341" t="13910" r="9206" b="14098" baseLine="14045">

<wd l="6341" t="13915" r="7267" b="14093"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Linguistics</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

<wd l="7320" t="13910" r="7915" b="14054">volume</wd>

<space/>

<wd l="7968" t="13910" r="8107" b="14078">2,</wd>

<space/>

<wd l="8165" t="13954" r="8611" b="14098">pages</wd>

<space/>

<wd l="8674" t="13910" r="9206" b="14054">69–73.</wd>

</run>

</ln>

</para>

<para l="6149" t="14290" r="10517" b="14477" alignment="justified" spaceBefore="145" lsp="exactly" lspExact="233" language="en">

<ln l="6149" t="14290" r="10517" b="14477" baseLine="14424" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="17">

<wd l="6149" t="14294" r="6307" b="14434">L.</wd>

<space/>

<wd l="6494" t="14290" r="7445" b="14477">Derczynski,</wd>

<space/>

<wd l="7656" t="14294" r="7838" b="14434">D.</wd>

<space/>

<wd l="8026" t="14290" r="8784" b="14477">Maynard,</wd>

<space/>

<wd l="8995" t="14294" r="9178" b="14434">N.</wd>

<space/>

<wd l="9365" t="14290" r="10013" b="14458">Aswani,</wd>

<space/>

<wd l="10229" t="14290" r="10517" b="14434">and</wd>

</ln>

</para>

<para l="6346" t="14510" r="10522" b="15312" alignment="justified" li="216" spaceBefore="3" lsp="exactly" lspExact="217" language="en">

<ln l="6350" t="14510" r="10517" b="14698" baseLine="14645" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14515" r="6528" b="14654">K.</wd>

<space/>

<wd l="6600" t="14510" r="7483" b="14654">Bontcheva.</wd>

<space/>

<wd l="7608" t="14510" r="8126" b="14654">2013a.</wd>

<space/>

<wd l="8246" t="14510" r="9638" b="14698">Microblog-Genre</wd>

<space/>

<wd l="9701" t="14510" r="10162" b="14654">Noise</wd>

<space/>

<wd l="10229" t="14510" r="10517" b="14654">and</wd>

<space/>

</ln>

<ln l="6346" t="14726" r="10507" b="14914" baseLine="14866">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6346" t="14731" r="6902" b="14909">Impact</wd>

<space/>

<wd l="6974" t="14770" r="7166" b="14870">on</wd>

<space/>

<wd l="7238" t="14726" r="7973" b="14870">Semantic</wd>

<space/>

<wd l="8040" t="14726" r="8938" b="14870">Annotation</wd>

<space/>

<wd l="9005" t="14731" r="9787" b="14914">Accuracy.</wd>

<space/>

<wd l="9926" t="14731" r="10094" b="14866">In</wd>

<space/>

</run>

<wd l="10157" t="14731" r="10507" b="14870" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Pro-</wd>

</ln>

<ln l="6350" t="14947" r="10522" b="15130" baseLine="15082" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="6350" t="14947" r="7046" b="15130">ceedings</wd>

<space/>

<wd l="7109" t="14947" r="7291" b="15130">of</wd>

<space/>

<wd l="7320" t="14947" r="7560" b="15091">the</wd>

<space/>

<wd l="7618" t="14947" r="7968" b="15091">24th</wd>

<space/>

<wd l="8016" t="14952" r="8458" b="15091">ACM</wd>

<space/>

<wd l="8515" t="14947" r="9413" b="15130">Conference</wd>

<space/>

<wd l="9480" t="14995" r="9672" b="15091">on</wd>

<space/>

<wd l="9730" t="14952" r="10522" b="15130">Hypertext</wd>

<space/>

</ln>

<ln l="6346" t="15168" r="8338" b="15312" baseLine="15302">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="6346" t="15168" r="6653" b="15312">and</wd>

<space/>

<wd l="6696" t="15168" r="7195" b="15312">Social</wd>

<space/>

</run>

<wd l="7238" t="15168" r="7790" b="15312"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Media</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="7853" t="15168" r="8338" b="15312">ACM.</wd>

</run>

</ln>

</para>

</column>

</section>

<dd l="5771" t="15736" r="6181" b="15977">

<para l="5804" t="15782" r="6148" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15782" r="6082" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="25">

<wd l="5870" t="15787" r="6082" b="15946">52</wd>

</ln>

</para>

</dd>

</body>

</page>

<page ocr-vers="OmniPageCSDK18" app-vers="OmniPageCSDK18">

<description>

<source file="C://Users//wing.nus//Documents//pdf\W15-4306.pdf.pdf" dpix="300" dpiy="300" sizex="2481" sizey="3508"/>

<theoreticalPage size="A4" marginLeft="1445" marginTop="1320" marginRight="6044" marginBottom="858" offsetX="10" offsetY="16" width="11918" height="16854"/>

<language>en</language>

</description>

<body>

<section l="1445" t="1320" r="5865" b="15980">

<column l="1445" t="1320" r="5865" b="15980">

<para l="1445" t="1363" r="5808" b="2602" alignment="justified" li="144" spaceBefore="4" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="1363" r="5798" b="1550" baseLine="1498" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1445" t="1368" r="1603" b="1507">L.</wd>

<space/>

<wd l="1680" t="1363" r="2630" b="1550">Derczynski,</wd>

<space/>

<wd l="2702" t="1368" r="2885" b="1507">A.</wd>

<space/>

<wd l="2962" t="1363" r="3451" b="1531">Ritter,</wd>

<space/>

<wd l="3528" t="1363" r="3672" b="1507">S.</wd>

<space/>

<wd l="3754" t="1363" r="4234" b="1531">Clark,</wd>

<space/>

<wd l="4310" t="1363" r="4598" b="1507">and</wd>

<space/>

<wd l="4666" t="1368" r="4843" b="1507">K.</wd>

<space/>

<wd l="4915" t="1363" r="5798" b="1507">Bontcheva.</wd>

<space/>

</ln>

<ln l="1646" t="1579" r="5794" b="1766" baseLine="1718" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1646" t="1579" r="2170" b="1723">2013b.</wd>

<space/>

<wd l="2434" t="1579" r="3005" b="1723">Twitter</wd>

<space/>

<wd l="3110" t="1579" r="4301" b="1762">Part-of-Speech</wd>

<space/>

<wd l="4406" t="1579" r="5050" b="1766">Tagging</wd>

<space/>

<wd l="5160" t="1579" r="5395" b="1723">for</wd>

<space/>

<wd l="5496" t="1579" r="5794" b="1723">All:</wd>

<space/>

</ln>

<ln l="1646" t="1800" r="5808" b="1987" baseLine="1934">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="1646" t="1800" r="2630" b="1987">Overcoming</wd>

<space/>

<wd l="2683" t="1800" r="3211" b="1982">Sparse</wd>

<space/>

<wd l="3264" t="1800" r="3547" b="1944">and</wd>

<space/>

<wd l="3590" t="1800" r="4061" b="1987">Noisy</wd>

<space/>

<wd l="4114" t="1805" r="4526" b="1944">Data.</wd>

<space/>

<wd l="4608" t="1805" r="4776" b="1939">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><wd l="4819" t="1800" r="5808" b="1982">Proceedings</wd>

<space/>

</run>

</ln>

<ln l="1646" t="2021" r="5803" b="2203" baseLine="2155" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1646" t="2021" r="1829" b="2203">of</wd>

<space/>

<wd l="1838" t="2026" r="2390" b="2165">Recent</wd>

<space/>

<wd l="2414" t="2021" r="3192" b="2165">Advances</wd>

<space/>

<wd l="3245" t="2026" r="3384" b="2165">in</wd>

<space/>

<wd l="3427" t="2021" r="4051" b="2165">Natural</wd>

<space/>

<wd l="4090" t="2026" r="4886" b="2203">Language</wd>

<space/>

<wd l="4930" t="2026" r="5803" b="2203">Processing</wd>

<space/>

</ln>

<ln l="1646" t="2237" r="5798" b="2424" baseLine="2376">

<wd l="1646" t="2242" r="2424" b="2414"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">(RANLP)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1"><space/>

<wd l="2520" t="2237" r="3461" b="2381">Association</wd>

<space/>

<wd l="3547" t="2237" r="3778" b="2381">for</wd>

<space/>

<wd l="3869" t="2237" r="5045" b="2419">Computational</wd>

<space/>

<wd l="5131" t="2237" r="5798" b="2424">Linguis-</wd>

</run>

</ln>

<ln l="1642" t="2458" r="1958" b="2602" baseLine="2592" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="1">

<wd l="1642" t="2458" r="1958" b="2602">tics.</wd>

</ln>

</para>

<para l="1445" t="2837" r="5798" b="3456" alignment="justified" li="144" spaceBefore="162" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="2837" r="5798" b="3024" baseLine="2971" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="2842" r="1603" b="2981">L.</wd>

<space/>

<wd l="1661" t="2837" r="2611" b="3024">Derczynski,</wd>

<space/>

<wd l="2674" t="2837" r="2818" b="2981">S.</wd>

<space/>

<wd l="2880" t="2837" r="3518" b="3005">Chester,</wd>

<space/>

<wd l="3576" t="2837" r="3864" b="2981">and</wd>

<space/>

<wd l="3912" t="2842" r="4090" b="2981">K.</wd>

<space/>

<wd l="4157" t="2837" r="4301" b="2981">S.</wd>

<space/>

<wd l="4358" t="2837" r="4829" b="3024">Bøgh.</wd>

<space/>

<wd l="4915" t="2837" r="5438" b="2981">2015a.</wd>

<space/>

<wd l="5520" t="2842" r="5798" b="2976">Ex-</wd>

</ln>

<ln l="1642" t="3058" r="5798" b="3245" baseLine="3192">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="3058" r="2136" b="3202">trinsic</wd>

<space/>

<wd l="2194" t="3058" r="2736" b="3240">impact</wd>

<space/>

<wd l="2794" t="3058" r="2966" b="3202">of</wd>

<space/>

<wd l="3010" t="3058" r="3514" b="3245">tuning</wd>

<space/>

<wd l="3571" t="3062" r="4109" b="3202">Brown</wd>

<space/>

<wd l="4166" t="3058" r="4987" b="3245">clustering.</wd>

<space/>

<wd l="5078" t="3062" r="5246" b="3197">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="5309" t="3062" r="5486" b="3202">To</wd>

<space/>

<wd l="5544" t="3106" r="5798" b="3240">ap-</wd>

</run>

</ln>

<ln l="1622" t="3278" r="2045" b="3456" baseLine="3413">

<wd l="1622" t="3322" r="2045" b="3456"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">pear</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1445" t="3653" r="5808" b="4675" alignment="justified" li="288" spaceBefore="152" fli="-288" lsp="exactly" lspExact="220" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1445" t="3653" r="5803" b="3840" baseLine="3792" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1445" t="3658" r="1699" b="3792">L.</wd>

<space/>

<wd l="1699" t="3653" r="2650" b="3840">Derczynski,</wd>

<space/>

<wd l="2755" t="3658" r="2938" b="3797">D.</wd>

<space/>

<wd l="3034" t="3653" r="3797" b="3840">Maynard,</wd>

<space/>

<wd l="3902" t="3653" r="4080" b="3797">G.</wd>

<space/>

<wd l="4176" t="3653" r="4685" b="3821">Rizzo,</wd>

<space/>

<wd l="4790" t="3658" r="5002" b="3797">M.</wd>

<space/>

<wd l="5102" t="3696" r="5381" b="3797">van</wd>

<space/>

<wd l="5472" t="3658" r="5803" b="3835">Erp,</wd>

<space/>

</ln>

<ln l="1646" t="3874" r="5798" b="4061" baseLine="4008" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1646" t="3874" r="1824" b="4018">G.</wd>

<space/>

<wd l="1920" t="3874" r="2534" b="4042">Gorrell,</wd>

<space/>

<wd l="2626" t="3878" r="2798" b="4018">R.</wd>

<space/>

<wd l="2885" t="3878" r="3480" b="4061">Troncy,</wd>

<space/>

<wd l="3581" t="3874" r="3864" b="4018">and</wd>

<space/>

<wd l="3946" t="3878" r="4123" b="4018">K.</wd>

<space/>

<wd l="4214" t="3874" r="5093" b="4018">Bontcheva.</wd>

<space/>

<wd l="5275" t="3874" r="5798" b="4018">2015b.</wd>

<space/>

</ln>

<ln l="1642" t="4090" r="5808" b="4277" baseLine="4229" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1642" t="4090" r="2333" b="4277">Analysis</wd>

<space/>

<wd l="2390" t="4090" r="2563" b="4234">of</wd>

<space/>

<wd l="2597" t="4090" r="3130" b="4234">named</wd>

<space/>

<wd l="3182" t="4090" r="3629" b="4277">entity</wd>

<space/>

<wd l="3677" t="4090" r="4584" b="4277">recognition</wd>

<space/>

<wd l="4637" t="4090" r="4920" b="4234">and</wd>

<space/>

<wd l="4963" t="4090" r="5525" b="4277">linking</wd>

<space/>

<wd l="5578" t="4090" r="5808" b="4234">for</wd>

<space/>

</ln>

<ln l="1642" t="4310" r="5803" b="4493" baseLine="4450">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="1642" t="4330" r="2189" b="4454">tweets.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><wd l="2357" t="4310" r="3312" b="4493">Information</wd>

<space/>

<wd l="3394" t="4315" r="4262" b="4493">Processing</wd>

<space/>

<wd l="4344" t="4310" r="4651" b="4454">and</wd>

<space/>

</run>

<wd l="4718" t="4315" r="5803" b="4493"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">Management</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2"><space/>

</run>

</ln>

<ln l="1646" t="4531" r="2434" b="4675" baseLine="4666" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-2">

<wd l="1646" t="4531" r="2434" b="4675">51:32–49.</wd>

</ln>

</para>

<para l="1445" t="4910" r="5808" b="5491" alignment="justified" li="144" spaceBefore="156" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="4910" r="5808" b="5098" baseLine="5045" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="4915" r="1560" b="5054">J.</wd>

<space/>

<wd l="1618" t="4910" r="2477" b="5054">Eisenstein.</wd>

<space/>

<wd l="2563" t="4910" r="2995" b="5054">2013.</wd>

<space/>

<wd l="3082" t="4910" r="3514" b="5054">What</wd>

<space/>

<wd l="3562" t="4930" r="3710" b="5054">to</wd>

<space/>

<wd l="3768" t="4910" r="3960" b="5054">do</wd>

<space/>

<wd l="4018" t="4910" r="4459" b="5054">about</wd>

<space/>

<wd l="4502" t="4910" r="4795" b="5054">bad</wd>

<space/>

<wd l="4838" t="4910" r="5558" b="5098">language</wd>

<space/>

<wd l="5616" t="4954" r="5808" b="5054">on</wd>

<space/>

</ln>

<ln l="1642" t="5126" r="5803" b="5314" baseLine="5266">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="5126" r="1882" b="5270">the</wd>

<space/>

<wd l="1949" t="5126" r="2592" b="5270">internet.</wd>

<space/>

<wd l="2717" t="5131" r="2880" b="5266">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2942" t="5126" r="3926" b="5309">Proceedings</wd>

<space/>

<wd l="3994" t="5126" r="4176" b="5309">of</wd>

<space/>

</run>

<wd l="4200" t="5131" r="5285" b="5294"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">NAACL-HLT</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="5357" t="5170" r="5803" b="5314">pages</wd>

<space/>

</run>

</ln>

<ln l="1646" t="5347" r="2376" b="5491" baseLine="5482" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="5347" r="2376" b="5491">359–369.</wd>

</ln>

</para>

<para l="1445" t="5726" r="5808" b="6307" alignment="justified" li="144" spaceBefore="161" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="5726" r="5798" b="5914" baseLine="5861" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="5731" r="1627" b="5870">H.</wd>

<space/>

<wd l="1675" t="5726" r="2549" b="5894">Fromreide,</wd>

<space/>

<wd l="2597" t="5731" r="2779" b="5870">D.</wd>

<space/>

<wd l="2832" t="5731" r="3298" b="5914">Hovy,</wd>

<space/>

<wd l="3350" t="5726" r="3638" b="5870">and</wd>

<space/>

<wd l="3677" t="5731" r="3854" b="5870">A.</wd>

<space/>

<wd l="3912" t="5726" r="4598" b="5914">Søgaard.</wd>

<space/>

<wd l="4690" t="5726" r="5122" b="5870">2014.</wd>

<space/>

<wd l="5208" t="5726" r="5798" b="5870">Crowd-</wd>

</ln>

<ln l="1651" t="5947" r="5808" b="6134" baseLine="6082" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="5947" r="2323" b="6134">sourcing</wd>

<space/>

<wd l="2395" t="5947" r="2678" b="6091">and</wd>

<space/>

<wd l="2746" t="5947" r="3581" b="6134">annotating</wd>

<space/>

<wd l="3648" t="5952" r="4046" b="6091">NER</wd>

<space/>

<wd l="4109" t="5947" r="4339" b="6091">for</wd>

<space/>

<wd l="4402" t="5947" r="4973" b="6091">Twitter</wd>

<space/>

<wd l="5035" t="5947" r="5515" b="6091">#drift.</wd>

<space/>

<wd l="5640" t="5952" r="5808" b="6086">In</wd>

<space/>

</ln>

<ln l="1642" t="6168" r="2645" b="6307" baseLine="6302">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="6168" r="2050" b="6307">Proc.</wd>

<space/>

</run>

<wd l="2117" t="6168" r="2645" b="6307"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">LREC</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1445" t="6542" r="5808" b="7166" alignment="justified" li="144" spaceBefore="156" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="6542" r="5808" b="6730" baseLine="6677" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="6547" r="1627" b="6686">A.</wd>

<space/>

<wd l="1704" t="6542" r="2573" b="6710">Johannsen,</wd>

<space/>

<wd l="2650" t="6547" r="2832" b="6686">D.</wd>

<space/>

<wd l="2909" t="6547" r="3379" b="6730">Hovy,</wd>

<space/>

<wd l="3456" t="6547" r="3638" b="6686">H.</wd>

<space/>

<wd l="3715" t="6547" r="3931" b="6686">M.</wd>

<space/>

<wd l="4008" t="6542" r="4622" b="6710">Alonso,</wd>

<space/>

<wd l="4699" t="6547" r="4872" b="6686">B.</wd>

<space/>

<wd l="4949" t="6542" r="5443" b="6710">Plank,</wd>

<space/>

<wd l="5525" t="6542" r="5808" b="6686">and</wd>

<space/>

</ln>

<ln l="1642" t="6763" r="5798" b="6950" baseLine="6898" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="6768" r="1824" b="6907">A.</wd>

<space/>

<wd l="1906" t="6763" r="2592" b="6950">Søgaard.</wd>

<space/>

<wd l="2731" t="6763" r="3163" b="6907">2014.</wd>

<space/>

<wd l="3302" t="6768" r="3730" b="6907">More</wd>

<space/>

<wd l="3802" t="6806" r="3965" b="6907">or</wd>

<space/>

<wd l="4032" t="6763" r="4325" b="6907">less</wd>

<space/>

<wd l="4406" t="6763" r="5251" b="6946">supervised</wd>

<space/>

<wd l="5323" t="6806" r="5798" b="6946">super-</wd>

</ln>

<ln l="1651" t="6979" r="5621" b="7166" baseLine="7118">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="7022" r="2074" b="7123">sense</wd>

<space/>

<wd l="2122" t="6979" r="2717" b="7166">tagging</wd>

<space/>

<wd l="2774" t="6979" r="2947" b="7123">of</wd>

<space/>

<wd l="2986" t="6979" r="3533" b="7123">twitter.</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3619" t="6984" r="4027" b="7123">Proc.</wd>

<space/>

</run>

<wd l="4118" t="6984" r="4627" b="7147"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">*SEM</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4680" t="7022" r="5131" b="7166">pages</wd>

<space/>

<wd l="5203" t="6979" r="5621" b="7123">1–11.</wd>

</run>

</ln>

</para>

<para l="1445" t="7358" r="5808" b="8160" alignment="justified" li="144" spaceBefore="156" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="7358" r="5808" b="7546" baseLine="7498" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="7363" r="1627" b="7502">D.</wd>

<space/>

<wd l="1718" t="7358" r="2203" b="7546">Kergl,</wd>

<space/>

<wd l="2290" t="7363" r="2462" b="7502">R.</wd>

<space/>

<wd l="2549" t="7358" r="3216" b="7526">Roedler,</wd>

<space/>

<wd l="3307" t="7358" r="3595" b="7502">and</wd>

<space/>

<wd l="3672" t="7358" r="3816" b="7502">S.</wd>

<space/>

<wd l="3907" t="7358" r="4474" b="7502">Seeber.</wd>

<space/>

<wd l="4646" t="7358" r="5078" b="7502">2014.</wd>

<space/>

<wd l="5251" t="7358" r="5491" b="7502">On</wd>

<space/>

<wd l="5563" t="7358" r="5808" b="7502">the</wd>

<space/>

</ln>

<ln l="1646" t="7579" r="5808" b="7766" baseLine="7714" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="7579" r="2611" b="7766">endogenesis</wd>

<space/>

<wd l="2712" t="7579" r="2885" b="7723">of</wd>

<space/>

<wd l="2962" t="7579" r="3662" b="7723">Twitter’s</wd>

<space/>

<wd l="3763" t="7579" r="4387" b="7762">Spritzer</wd>

<space/>

<wd l="4483" t="7579" r="4766" b="7723">and</wd>

<space/>

<wd l="4862" t="7579" r="5808" b="7723">Gardenhose</wd>

<space/>

</ln>

<ln l="1651" t="7800" r="5798" b="7987" baseLine="7934">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="7800" r="2203" b="7982">sample</wd>

<space/>

<wd l="2266" t="7819" r="2904" b="7944">streams.</wd>

<space/>

<wd l="2986" t="7805" r="3154" b="7939">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="3202" t="7805" r="3610" b="7944">Proc.</wd>

<space/>

</run>

<wd l="3662" t="7805" r="4502" b="7968"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ASONAM</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4560" t="7843" r="5006" b="7987">pages</wd>

<space/>

<wd l="5064" t="7800" r="5798" b="7944">357–364.</wd>

<space/>

</run>

</ln>

<ln l="1642" t="8021" r="2112" b="8160" baseLine="8155" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="8021" r="2112" b="8160">IEEE.</wd>

</ln>

</para>

<para l="1445" t="8395" r="5808" b="9019" alignment="justified" li="144" spaceBefore="156" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="8395" r="5808" b="8578" baseLine="8530" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="8400" r="1589" b="8539">T.</wd>

<space/>

<wd l="1680" t="8400" r="2054" b="8563">Koo,</wd>

<space/>

<wd l="2141" t="8400" r="2323" b="8539">X.</wd>

<space/>

<wd l="2410" t="8395" r="3125" b="8563">Carreras,</wd>

<space/>

<wd l="3216" t="8395" r="3499" b="8539">and</wd>

<space/>

<wd l="3571" t="8400" r="3787" b="8539">M.</wd>

<space/>

<wd l="3878" t="8395" r="4488" b="8539">Collins.</wd>

<space/>

<wd l="4651" t="8395" r="5083" b="8539">2008.</wd>

<space/>

<wd l="5246" t="8395" r="5808" b="8578">Simple</wd>

<space/>

</ln>

<ln l="1651" t="8616" r="5803" b="8803" baseLine="8750">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="8616" r="2938" b="8798">semi-supervised</wd>

<space/>

<wd l="2995" t="8616" r="3931" b="8803">dependency</wd>

<space/>

<wd l="3989" t="8616" r="4613" b="8803">parsing.</wd>

<space/>

<wd l="4704" t="8621" r="4867" b="8755">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4920" t="8621" r="5328" b="8760">Proc.</wd>

<space/>

</run>

<wd l="5386" t="8621" r="5803" b="8784"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ACL</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

</run>

</ln>

<ln l="1642" t="8832" r="2405" b="9019" baseLine="8971" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="8875" r="2016" b="9019">page</wd>

<space/>

<wd l="2074" t="8832" r="2405" b="8976">595.</wd>

</ln>

</para>

<para l="1445" t="9211" r="5842" b="10051" alignment="justified" li="144" spaceBefore="158" fli="-144" lsp="exactly" lspExact="220" language="en">

<bullet type="ordered" numChars="3">

</bullet>

<ln l="1445" t="9211" r="5798" b="9398" baseLine="9350" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="9216" r="1728" b="9350">M.</wd>

<space/>

<wd l="1728" t="9211" r="2006" b="9355">Lui</wd>

<space/>

<wd l="2074" t="9211" r="2357" b="9355">and</wd>

<space/>

<wd l="2414" t="9216" r="2558" b="9355">T.</wd>

<space/>

<wd l="2630" t="9211" r="3346" b="9355">Baldwin.</wd>

<space/>

<wd l="3470" t="9211" r="3902" b="9355">2012.</wd>

<space/>

<wd l="4018" t="9211" r="4800" b="9398">langid.py:</wd>

<space/>

<wd l="4901" t="9216" r="5146" b="9350">An</wd>

<space/>

<wd l="5208" t="9211" r="5798" b="9355">off-the-</wd>

</ln>

<ln l="1651" t="9432" r="5842" b="9619" baseLine="9566">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1651" t="9432" r="2040" b="9576">shelf</wd>

<space/>

<wd l="2083" t="9432" r="2798" b="9619">language</wd>

<space/>

<wd l="2851" t="9432" r="3907" b="9576">identification</wd>

<space/>

<wd l="3955" t="9432" r="4306" b="9576">tool.</wd>

<space/>

<wd l="4397" t="9437" r="4565" b="9571">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="4613" t="9432" r="5602" b="9614">Proceedings</wd>

<space/>

<wd l="5659" t="9432" r="5842" b="9614">of</wd>

<space/>

</run>

</ln>

<ln l="1646" t="9653" r="5798" b="9835" baseLine="9787" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1646" t="9653" r="1882" b="9797">the</wd>

<space/>

<wd l="1930" t="9653" r="2280" b="9797">50th</wd>

<space/>

<wd l="2309" t="9653" r="2904" b="9797">Annual</wd>

<space/>

<wd l="2938" t="9658" r="3590" b="9835">Meeting</wd>

<space/>

<wd l="3643" t="9653" r="3821" b="9835">of</wd>

<space/>

<wd l="3840" t="9653" r="4075" b="9797">the</wd>

<space/>

<wd l="4104" t="9658" r="5045" b="9797">Association</wd>

<space/>

<wd l="5059" t="9653" r="5333" b="9835">for</wd>

<space/>

<wd l="5376" t="9658" r="5798" b="9797">Com-</wd>

</ln>

<ln l="1622" t="9869" r="3965" b="10051" baseLine="10008">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1622" t="9869" r="2462" b="10051">putational</wd>

<space/>

<wd l="2506" t="9874" r="3384" b="10051">Linguistics</wd>

<space/>

</run>

<wd l="3442" t="9874" r="3965" b="10046"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(ACL)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

<para l="1445" t="10248" r="5808" b="11093" alignment="justified" li="144" spaceBefore="157" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="10248" r="5798" b="10416" baseLine="10387" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="10253" r="1627" b="10392">A.</wd>

<space/>

<wd l="1714" t="10248" r="2203" b="10416">Ritter,</wd>

<space/>

<wd l="2299" t="10248" r="2443" b="10392">S.</wd>

<space/>

<wd l="2539" t="10248" r="3019" b="10416">Clark,</wd>

<space/>

<wd l="3110" t="10253" r="3840" b="10416">Mausam,</wd>

<space/>

<wd l="3941" t="10248" r="4224" b="10392">and</wd>

<space/>

<wd l="4306" t="10248" r="4483" b="10392">O.</wd>

<space/>

<wd l="4570" t="10248" r="5184" b="10392">Etzioni.</wd>

<space/>

<wd l="5366" t="10248" r="5798" b="10392">2011.</wd>

<space/>

</ln>

<ln l="1642" t="10469" r="5798" b="10656" baseLine="10603" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="10469" r="2218" b="10613">Named</wd>

<space/>

<wd l="2280" t="10469" r="2726" b="10656">entity</wd>

<space/>

<wd l="2784" t="10469" r="3696" b="10656">recognition</wd>

<space/>

<wd l="3749" t="10469" r="3907" b="10608">in</wd>

<space/>

<wd l="3960" t="10488" r="4512" b="10613">tweets:</wd>

<space/>

<wd l="4603" t="10474" r="4848" b="10608">An</wd>

<space/>

<wd l="4910" t="10469" r="5798" b="10651">experimen-</wd>

</ln>

<ln l="1642" t="10685" r="5808" b="10872" baseLine="10824">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="10685" r="1838" b="10829">tal</wd>

<space/>

<wd l="1901" t="10685" r="2347" b="10872">study.</wd>

<space/>

<wd l="2429" t="10690" r="2597" b="10824">In</wd>

<space/>

</run>

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="2645" t="10690" r="3053" b="10829">Proc.</wd>

<space/>

<wd l="3130" t="10685" r="3312" b="10867">of</wd>

<space/>

<wd l="3322" t="10685" r="4128" b="10867">Empirical</wd>

<space/>

<wd l="4171" t="10685" r="4858" b="10829">Methods</wd>

<space/>

<wd l="4877" t="10685" r="5150" b="10867">for</wd>

<space/>

<wd l="5184" t="10685" r="5808" b="10829">Natural</wd>

<space/>

</run>

</ln>

<ln l="1637" t="10906" r="5558" b="11093" baseLine="11045">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1637" t="10910" r="2434" b="11088">Language</wd>

<space/>

<wd l="2486" t="10910" r="3360" b="11088">Processing</wd>

<space/>

</run>

<wd l="3418" t="10910" r="4238" b="11083"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">(EMNLP)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="4296" t="10906" r="5174" b="11093">Edinburgh,</wd>

<space/>

<wd l="5232" t="10910" r="5558" b="11050">UK.</wd>

</run>

</ln>

</para>

<para l="1445" t="11285" r="5813" b="12350" alignment="justified" li="144" spaceBefore="152" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="11285" r="5803" b="11472" baseLine="11419" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1445" t="11290" r="1627" b="11429">A.</wd>

<space/>

<wd l="1723" t="11285" r="2194" b="11429">Smith</wd>

<space/>

<wd l="2275" t="11285" r="2563" b="11429">and</wd>

<space/>

<wd l="2640" t="11290" r="2856" b="11429">M.</wd>

<space/>

<wd l="2952" t="11285" r="3662" b="11429">Osborne.</wd>

<space/>

<wd l="3845" t="11285" r="4277" b="11429">2006.</wd>

<space/>

<wd l="4454" t="11285" r="4930" b="11472">Using</wd>

<space/>

<wd l="5016" t="11304" r="5803" b="11472">gazetteers</wd>

<space/>

</ln>

<ln l="1642" t="11506" r="5798" b="11650" baseLine="11640">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1642" t="11506" r="1800" b="11645">in</wd>

<space/>

<wd l="1891" t="11506" r="3019" b="11650">discriminative</wd>

<space/>

<wd l="3110" t="11506" r="4056" b="11650">information</wd>

<space/>

<wd l="4152" t="11506" r="4978" b="11650">extraction.</wd>

<space/>

<wd l="5194" t="11510" r="5362" b="11645">In</wd>

<space/>

</run>

<wd l="5453" t="11510" r="5798" b="11650" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">Pro-</wd>

</ln>

<ln l="1646" t="11722" r="5813" b="11904" baseLine="11861" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1646" t="11722" r="2338" b="11904">ceedings</wd>

<space/>

<wd l="2400" t="11722" r="2582" b="11904">of</wd>

<space/>

<wd l="2606" t="11722" r="2846" b="11866">the</wd>

<space/>

<wd l="2914" t="11722" r="3336" b="11866">Tenth</wd>

<space/>

<wd l="3403" t="11722" r="4301" b="11904">Conference</wd>

<space/>

<wd l="4363" t="11770" r="4555" b="11866">on</wd>

<space/>

<wd l="4622" t="11722" r="5813" b="11904">Computational</wd>

<space/>

</ln>

<ln l="1637" t="11942" r="5808" b="12130" baseLine="12077">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><wd l="1637" t="11942" r="2261" b="12086">Natural</wd>

<space/>

<wd l="2299" t="11947" r="3091" b="12125">Language</wd>

<space/>

<wd l="3134" t="11947" r="3864" b="12125">Learning</wd>

<space/>

</run>

<wd l="3917" t="11947" r="4862" b="12120"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">(CoNLL-X)</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1"><space/>

<wd l="4915" t="11986" r="5362" b="12130">pages</wd>

<space/>

<wd l="5429" t="11942" r="5808" b="12086">133–</wd>

<space/>

</run>

</ln>

<ln l="1661" t="12163" r="5472" b="12350" baseLine="12298" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="-1">

<wd l="1661" t="12163" r="1978" b="12307">140.</wd>

<space/>

<wd l="2040" t="12163" r="2981" b="12307">Association</wd>

<space/>

<wd l="3034" t="12163" r="3264" b="12307">for</wd>

<space/>

<wd l="3317" t="12163" r="4498" b="12346">Computational</wd>

<space/>

<wd l="4546" t="12163" r="5472" b="12350">Linguistics.</wd>

</ln>

</para>

<para l="1445" t="12542" r="5808" b="13781" alignment="justified" li="144" spaceBefore="153" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="12542" r="5808" b="12730" baseLine="12677" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="12547" r="1627" b="12686">H.</wd>

<space/>

<wd l="1704" t="12542" r="2227" b="12710">Tissot,</wd>

<space/>

<wd l="2304" t="12547" r="2486" b="12686">A.</wd>

<space/>

<wd l="2563" t="12542" r="3226" b="12710">Roberts,</wd>

<space/>

<wd l="3302" t="12547" r="3461" b="12686">L.</wd>

<space/>

<wd l="3538" t="12542" r="4488" b="12730">Derczynski,</wd>

<space/>

<wd l="4570" t="12542" r="4747" b="12686">G.</wd>

<space/>

<wd l="4829" t="12542" r="5443" b="12710">Gorrell,</wd>

<space/>

<wd l="5525" t="12542" r="5808" b="12686">and</wd>

<space/>

</ln>

<ln l="1642" t="12758" r="5808" b="12946" baseLine="12898" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="12763" r="1858" b="12902">M.</wd>

<space/>

<wd l="1925" t="12758" r="2573" b="12902">Didonet</wd>

<space/>

<wd l="2626" t="12758" r="2909" b="12902">Del</wd>

<space/>

<wd l="2971" t="12758" r="3470" b="12902">Fabro.</wd>

<space/>

<wd l="3581" t="12758" r="4013" b="12902">2015.</wd>

<space/>

<wd l="4123" t="12758" r="4814" b="12946">Analysis</wd>

<space/>

<wd l="4882" t="12758" r="5054" b="12902">of</wd>

<space/>

<wd l="5098" t="12758" r="5808" b="12941">temporal</wd>

<space/>

</ln>

<ln l="1646" t="12979" r="5798" b="13162" baseLine="13114">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="12979" r="2563" b="13162">expressions</wd>

<space/>

<wd l="2630" t="12979" r="3398" b="13123">annotated</wd>

<space/>

<wd l="3456" t="12979" r="3610" b="13118">in</wd>

<space/>

<wd l="3667" t="12979" r="4248" b="13123">clinical</wd>

<space/>

<wd l="4306" t="12998" r="4766" b="13123">notes.</wd>

<space/>

<wd l="4867" t="12984" r="5030" b="13118">In</wd>

<space/>

</run>

<wd l="5088" t="12979" r="5798" b="13123" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Proceed-</wd>

</ln>

<ln l="1651" t="13200" r="5798" b="13382" baseLine="13334" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1651" t="13205" r="1973" b="13382">ings</wd>

<space/>

<wd l="2035" t="13200" r="2218" b="13382">of</wd>

<space/>

<wd l="2251" t="13200" r="2592" b="13344">11th</wd>

<space/>

<wd l="2650" t="13205" r="3058" b="13344">Joint</wd>

<space/>

<wd l="3091" t="13205" r="3835" b="13344">ACL-ISO</wd>

<space/>

<wd l="3907" t="13200" r="4685" b="13382">Workshop</wd>

<space/>

<wd l="4752" t="13248" r="4939" b="13344">on</wd>

<space/>

<wd l="4997" t="13205" r="5798" b="13382">Interoper-</wd>

</ln>

<ln l="1642" t="13416" r="5803" b="13603" baseLine="13555">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="13416" r="1982" b="13560">able</wd>

<space/>

<wd l="2059" t="13421" r="2789" b="13560">Semantic</wd>

<space/>

</run>

<wd l="2851" t="13421" r="3792" b="13584"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Annotation</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">,</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><space/>

<wd l="3883" t="13459" r="4330" b="13603">pages</wd>

<space/>

<wd l="4416" t="13416" r="5050" b="13584">93–102,</wd>

<space/>

<wd l="5141" t="13416" r="5803" b="13584">London,</wd>

<space/>

</run>

</ln>

<ln l="1642" t="13637" r="2458" b="13781" baseLine="13771" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1642" t="13642" r="1968" b="13781">UK.</wd>

<space/>

<wd l="2030" t="13637" r="2458" b="13781">ACL.</wd>

</ln>

</para>

<para l="1445" t="14016" r="5808" b="14635" alignment="justified" li="144" spaceBefore="157" spaceAfter="1329" fli="-144" lsp="exactly" lspExact="220" language="en">

<ln l="1445" t="14016" r="5808" b="14160" baseLine="14150" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">

<wd l="1445" t="14021" r="1627" b="14160">D.</wd>

<space/>

<wd l="1709" t="14016" r="2525" b="14160">Vrandeˇci´c</wd>

<space/>

<wd l="2606" t="14016" r="2890" b="14160">and</wd>

<space/>

<wd l="2962" t="14021" r="3178" b="14160">M.</wd>

<space/>

<wd l="3269" t="14016" r="4022" b="14160">Kr¨otzsch.</wd>

<space/>

<wd l="4186" t="14016" r="4618" b="14160">2014.</wd>

<space/>

<wd l="4776" t="14016" r="5539" b="14160">Wikidata:</wd>

<space/>

<wd l="5664" t="14021" r="5808" b="14155">A</wd>

<space/>

</ln>

<ln l="1642" t="14232" r="5798" b="14419" baseLine="14371">

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1642" t="14237" r="1992" b="14376">Free</wd>

<space/>

<wd l="2074" t="14232" r="3144" b="14376">Collaborative</wd>

<space/>

<wd l="3221" t="14232" r="4128" b="14419">Knowledge</wd>

<space/>

<wd l="4200" t="14237" r="4627" b="14376">Base.</wd>

<space/>

</run>

<wd l="4790" t="14237" r="5798" b="14376" italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">Communica-</wd>

</ln>

<ln l="1646" t="14453" r="3029" b="14635" baseLine="14587">

<run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><wd l="1646" t="14458" r="2030" b="14597">tions</wd>

<space/>

<wd l="2083" t="14453" r="2266" b="14635">of</wd>

<space/>

<wd l="2290" t="14453" r="2525" b="14597">the</wd>

<space/>

</run>

<wd l="2563" t="14458" r="3029" b="14597"><run italic="true" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">ACM</run>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0">.</run>

</wd>

<run underlined="none" subsuperscript="none" fontSize="1000" fontFace="Times New Roman" fontFamily="roman" fontPitch="variable" spacing="0"><nl orig="true"/>

</run>

</ln>

</para>

</column>

</section>

<dd l="5865" t="15736" r="6171" b="15980">

<para l="5865" t="15787" r="6138" b="15946" alignment="left" spaceBefore="4" lsp="exactly" lspExact="229" language="en">

<ln l="5870" t="15787" r="6072" b="15946" baseLine="15936" underlined="none" subsuperscript="none" fontSize="1000" fontFace="Tahoma" fontFamily="swiss" fontPitch="variable" spacing="-9">

<wd l="5870" t="15787" r="6072" b="15946">53</wd>

</ln>

</para>

</dd>

</body>

</page>

</document>

