@inproceedings{chang-etal-2022-automatic,
title = "Automatic Word Segmentation and Part-of-Speech Tagging of {A}ncient {C}hinese Based on {BERT} Model",
author = "Chang, Yu and
Zhu, Peng and
Wang, Chaoping and
Wang, Chaofan",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lt4hala-1.20",
pages = "141--145",
abstract = "In recent years, new deep learning methods and pre-training language models have been emerging in the field of natural language processing (NLP). These methods and models can greatly improve the accuracy of automatic word segmentation and part-of-speech tagging in the field of ancient Chinese research. In these models, the BERT model has made amazing achievements in the top-level test of machine reading comprehension SQuAD-1.1. In addition, it also showed better results than other models in 11 different NLP tests. In this paper, SIKU-RoBERTa pre-training language model based on the high-quality full-text corpus of SiKuQuanShu have been adopted, and part corpus of ZuoZhuan that has been word segmented and part-of-speech tagged is used as training sets to build a deep network model based on BERT for word segmentation and POS tagging experiments. In addition, we also use other classical NLP network models for comparative experiments. The results show that using SIKU-RoBERTa pre-training language model, the overall prediction accuracy of word segmentation and part-of-speech tagging of this model can reach 93.87{\%} and 88.97{\%}, with excellent overall performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chang-etal-2022-automatic">
<titleInfo>
<title>Automatic Word Segmentation and Part-of-Speech Tagging of Ancient Chinese Based on BERT Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peng</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaoping</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaofan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, new deep learning methods and pre-training language models have been emerging in the field of natural language processing (NLP). These methods and models can greatly improve the accuracy of automatic word segmentation and part-of-speech tagging in the field of ancient Chinese research. In these models, the BERT model has made amazing achievements in the top-level test of machine reading comprehension SQuAD-1.1. In addition, it also showed better results than other models in 11 different NLP tests. In this paper, SIKU-RoBERTa pre-training language model based on the high-quality full-text corpus of SiKuQuanShu have been adopted, and part corpus of ZuoZhuan that has been word segmented and part-of-speech tagged is used as training sets to build a deep network model based on BERT for word segmentation and POS tagging experiments. In addition, we also use other classical NLP network models for comparative experiments. The results show that using SIKU-RoBERTa pre-training language model, the overall prediction accuracy of word segmentation and part-of-speech tagging of this model can reach 93.87% and 88.97%, with excellent overall performance.</abstract>
<identifier type="citekey">chang-etal-2022-automatic</identifier>
<location>
<url>https://aclanthology.org/2022.lt4hala-1.20</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>141</start>
<end>145</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Word Segmentation and Part-of-Speech Tagging of Ancient Chinese Based on BERT Model
%A Chang, Yu
%A Zhu, Peng
%A Wang, Chaoping
%A Wang, Chaofan
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F chang-etal-2022-automatic
%X In recent years, new deep learning methods and pre-training language models have been emerging in the field of natural language processing (NLP). These methods and models can greatly improve the accuracy of automatic word segmentation and part-of-speech tagging in the field of ancient Chinese research. In these models, the BERT model has made amazing achievements in the top-level test of machine reading comprehension SQuAD-1.1. In addition, it also showed better results than other models in 11 different NLP tests. In this paper, SIKU-RoBERTa pre-training language model based on the high-quality full-text corpus of SiKuQuanShu have been adopted, and part corpus of ZuoZhuan that has been word segmented and part-of-speech tagged is used as training sets to build a deep network model based on BERT for word segmentation and POS tagging experiments. In addition, we also use other classical NLP network models for comparative experiments. The results show that using SIKU-RoBERTa pre-training language model, the overall prediction accuracy of word segmentation and part-of-speech tagging of this model can reach 93.87% and 88.97%, with excellent overall performance.
%U https://aclanthology.org/2022.lt4hala-1.20
%P 141-145
Markdown (Informal)
[Automatic Word Segmentation and Part-of-Speech Tagging of Ancient Chinese Based on BERT Model](https://aclanthology.org/2022.lt4hala-1.20) (Chang et al., LT4HALA 2022)
ACL