@inproceedings{jeong-etal-2020-constructing,
title = "Constructing a {K}orean Named Entity Recognition Dataset for the Financial Domain using Active Learning",
author = "Jeong, Dong-Ho and
Heo, Min-Kang and
Kim, Hyung-Chul and
Park, Sang-Won",
editor = "Bhattacharyya, Pushpak and
Sharma, Dipti Misra and
Sangal, Rajeev",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2020",
address = "Indian Institute of Technology Patna, Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-main.27",
pages = "208--212",
abstract = "The performance of deep learning models depends on the quality and quantity of data. Data construction, however, is time- consuming and costly. In addition, when expert domain data are constructed, the availability of experts is limited. In such cases, active learning can efficiently increase the performance of the learning models with minimal data construction. Although various datasets have been constructed using active learning techniques, vigorous studies on the construction of Korean data on expert domains are yet to be conducted. In this study, a corpus for named entity recognition was constructed for the financial domain using the active learning technique. The contributions of the study are as follows. (1) It was verified that the active learning technique could effectively construct the named entity recognition corpus for the financial domain, and (2) a named entity recognizer for the financial domain was developed. Data of 8,043 sentences were constructed using the proposed method, and the performance of the named entity recognizer reached 80.84{\%}. Moreover, the proposed method reduced data construction costs by 12{--}25{\%}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jeong-etal-2020-constructing">
<titleInfo>
<title>Constructing a Korean Named Entity Recognition Dataset for the Financial Domain using Active Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dong-Ho</namePart>
<namePart type="family">Jeong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Kang</namePart>
<namePart type="family">Heo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyung-Chul</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sang-Won</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="given">Misra</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajeev</namePart>
<namePart type="family">Sangal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Indian Institute of Technology Patna, Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The performance of deep learning models depends on the quality and quantity of data. Data construction, however, is time- consuming and costly. In addition, when expert domain data are constructed, the availability of experts is limited. In such cases, active learning can efficiently increase the performance of the learning models with minimal data construction. Although various datasets have been constructed using active learning techniques, vigorous studies on the construction of Korean data on expert domains are yet to be conducted. In this study, a corpus for named entity recognition was constructed for the financial domain using the active learning technique. The contributions of the study are as follows. (1) It was verified that the active learning technique could effectively construct the named entity recognition corpus for the financial domain, and (2) a named entity recognizer for the financial domain was developed. Data of 8,043 sentences were constructed using the proposed method, and the performance of the named entity recognizer reached 80.84%. Moreover, the proposed method reduced data construction costs by 12–25%</abstract>
<identifier type="citekey">jeong-etal-2020-constructing</identifier>
<location>
<url>https://aclanthology.org/2020.icon-main.27</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>208</start>
<end>212</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Constructing a Korean Named Entity Recognition Dataset for the Financial Domain using Active Learning
%A Jeong, Dong-Ho
%A Heo, Min-Kang
%A Kim, Hyung-Chul
%A Park, Sang-Won
%Y Bhattacharyya, Pushpak
%Y Sharma, Dipti Misra
%Y Sangal, Rajeev
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON)
%D 2020
%8 December
%I NLP Association of India (NLPAI)
%C Indian Institute of Technology Patna, Patna, India
%F jeong-etal-2020-constructing
%X The performance of deep learning models depends on the quality and quantity of data. Data construction, however, is time- consuming and costly. In addition, when expert domain data are constructed, the availability of experts is limited. In such cases, active learning can efficiently increase the performance of the learning models with minimal data construction. Although various datasets have been constructed using active learning techniques, vigorous studies on the construction of Korean data on expert domains are yet to be conducted. In this study, a corpus for named entity recognition was constructed for the financial domain using the active learning technique. The contributions of the study are as follows. (1) It was verified that the active learning technique could effectively construct the named entity recognition corpus for the financial domain, and (2) a named entity recognizer for the financial domain was developed. Data of 8,043 sentences were constructed using the proposed method, and the performance of the named entity recognizer reached 80.84%. Moreover, the proposed method reduced data construction costs by 12–25%
%U https://aclanthology.org/2020.icon-main.27
%P 208-212
Markdown (Informal)
[Constructing a Korean Named Entity Recognition Dataset for the Financial Domain using Active Learning](https://aclanthology.org/2020.icon-main.27) (Jeong et al., ICON 2020)
ACL