@inproceedings{hou-etal-2019-projecting,
title = "Projecting named entity recognizers without annotated or parallel corpora",
author = "Hou, Jue and
Koppatz, Maximilian and
Quecedo, Jos{\'e} Mar{\'\i}a Hoya and
Yangarber, Roman",
editor = "Hartmann, Mareike and
Plank, Barbara",
booktitle = "Proceedings of the 22nd Nordic Conference on Computational Linguistics",
month = sep # "{--}" # oct,
year = "2019",
address = "Turku, Finland",
publisher = {Link{\"o}ping University Electronic Press},
url = "https://aclanthology.org/W19-6124",
pages = "232--241",
abstract = "Named entity recognition (NER) is a well-researched task in the field of NLP, which typically requires large annotated corpora for training usable models. This is a problem for languages which lack large annotated corpora, such as Finnish. We propose an approach to create a named entity recognizer with no annotated or parallel documents, by leveraging strong NER models that exist for English. We automatically gather a large amount of \textit{chronologically matched} data in two languages, then project named entity annotations from the English documents onto the Finnish ones, by resolving the matches with limited linguistic rules. We use this {``}artificially{''} annotated data to train a BiLSTM-CRF model. Our results show that this method can produce annotated instances with high precision, and the resulting model achieves state-of-the-art performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hou-etal-2019-projecting">
<titleInfo>
<title>Projecting named entity recognizers without annotated or parallel corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jue</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Koppatz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">María</namePart>
<namePart type="given">Hoya</namePart>
<namePart type="family">Quecedo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-sep–oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Nordic Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mareike</namePart>
<namePart type="family">Hartmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press</publisher>
<place>
<placeTerm type="text">Turku, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named entity recognition (NER) is a well-researched task in the field of NLP, which typically requires large annotated corpora for training usable models. This is a problem for languages which lack large annotated corpora, such as Finnish. We propose an approach to create a named entity recognizer with no annotated or parallel documents, by leveraging strong NER models that exist for English. We automatically gather a large amount of chronologically matched data in two languages, then project named entity annotations from the English documents onto the Finnish ones, by resolving the matches with limited linguistic rules. We use this “artificially” annotated data to train a BiLSTM-CRF model. Our results show that this method can produce annotated instances with high precision, and the resulting model achieves state-of-the-art performance.</abstract>
<identifier type="citekey">hou-etal-2019-projecting</identifier>
<location>
<url>https://aclanthology.org/W19-6124</url>
</location>
<part>
<date>2019-sep–oct</date>
<extent unit="page">
<start>232</start>
<end>241</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Projecting named entity recognizers without annotated or parallel corpora
%A Hou, Jue
%A Koppatz, Maximilian
%A Quecedo, José María Hoya
%A Yangarber, Roman
%Y Hartmann, Mareike
%Y Plank, Barbara
%S Proceedings of the 22nd Nordic Conference on Computational Linguistics
%D 2019
%8 sep–oct
%I Linköping University Electronic Press
%C Turku, Finland
%F hou-etal-2019-projecting
%X Named entity recognition (NER) is a well-researched task in the field of NLP, which typically requires large annotated corpora for training usable models. This is a problem for languages which lack large annotated corpora, such as Finnish. We propose an approach to create a named entity recognizer with no annotated or parallel documents, by leveraging strong NER models that exist for English. We automatically gather a large amount of chronologically matched data in two languages, then project named entity annotations from the English documents onto the Finnish ones, by resolving the matches with limited linguistic rules. We use this “artificially” annotated data to train a BiLSTM-CRF model. Our results show that this method can produce annotated instances with high precision, and the resulting model achieves state-of-the-art performance.
%U https://aclanthology.org/W19-6124
%P 232-241
Markdown (Informal)
[Projecting named entity recognizers without annotated or parallel corpora](https://aclanthology.org/W19-6124) (Hou et al., NoDaLiDa 2019)
ACL