@inproceedings{krishnan-etal-2021-employing,
title = "Employing {W}ikipedia as a resource for Named Entity Recognition in Morphologically complex under-resourced languages",
author = "Krishnan, Aravind and
Ziehe, Stefan and
Pannach, Franziska and
Sporleder, Caroline",
editor = "Rapp, Reinhard and
Sharoff, Serge and
Zweigenbaum, Pierre",
booktitle = "Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)",
month = sep,
year = "2021",
address = "Online (Virtual Mode)",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.bucc-1.5",
pages = "28--39",
abstract = "We propose a novel approach for rapid prototyping of named entity recognisers through the development of semi-automatically annotated datasets. We demonstrate the proposed pipeline on two under-resourced agglutinating languages: the Dravidian language Malayalam and the Bantu language isiZulu. Our approach is weakly supervised and bootstraps training data from Wikipedia and Google Knowledge Graph. Moreover, our approach is relatively language independent and can consequently be ported quickly (and hence cost-effectively) from one language to another, requiring only minor language-specific tailoring.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krishnan-etal-2021-employing">
<titleInfo>
<title>Employing Wikipedia as a resource for Named Entity Recognition in Morphologically complex under-resourced languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aravind</namePart>
<namePart type="family">Krishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ziehe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franziska</namePart>
<namePart type="family">Pannach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caroline</namePart>
<namePart type="family">Sporleder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Online (Virtual Mode)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a novel approach for rapid prototyping of named entity recognisers through the development of semi-automatically annotated datasets. We demonstrate the proposed pipeline on two under-resourced agglutinating languages: the Dravidian language Malayalam and the Bantu language isiZulu. Our approach is weakly supervised and bootstraps training data from Wikipedia and Google Knowledge Graph. Moreover, our approach is relatively language independent and can consequently be ported quickly (and hence cost-effectively) from one language to another, requiring only minor language-specific tailoring.</abstract>
<identifier type="citekey">krishnan-etal-2021-employing</identifier>
<location>
<url>https://aclanthology.org/2021.bucc-1.5</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>28</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Employing Wikipedia as a resource for Named Entity Recognition in Morphologically complex under-resourced languages
%A Krishnan, Aravind
%A Ziehe, Stefan
%A Pannach, Franziska
%A Sporleder, Caroline
%Y Rapp, Reinhard
%Y Sharoff, Serge
%Y Zweigenbaum, Pierre
%S Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Online (Virtual Mode)
%F krishnan-etal-2021-employing
%X We propose a novel approach for rapid prototyping of named entity recognisers through the development of semi-automatically annotated datasets. We demonstrate the proposed pipeline on two under-resourced agglutinating languages: the Dravidian language Malayalam and the Bantu language isiZulu. Our approach is weakly supervised and bootstraps training data from Wikipedia and Google Knowledge Graph. Moreover, our approach is relatively language independent and can consequently be ported quickly (and hence cost-effectively) from one language to another, requiring only minor language-specific tailoring.
%U https://aclanthology.org/2021.bucc-1.5
%P 28-39
Markdown (Informal)
[Employing Wikipedia as a resource for Named Entity Recognition in Morphologically complex under-resourced languages](https://aclanthology.org/2021.bucc-1.5) (Krishnan et al., BUCC 2021)
ACL