@inproceedings{deng-etal-2023-tikem,
title = "{T}i{KEM}:基于知识增强的藏文预训练语言模型({T}i{KEM}: Knowledge Enhanced {T}ibetan Pre-trained Language Model)",
author = "Deng, Junjie and
Chen, Long and
Zhang, Yan and
Sun, YUan and
Zhao, Xiaobin",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2023.ccl-1.12",
pages = "135--144",
abstract = "{``}预训练语言模型在中英文领域有着优异的表现,而低资源语言数据获取难度大,预训练语言模型在低资源语言如藏文上的研究刚取得初步进展。现有的藏文预训练语言模型,使用大规模无结构的文本语料库进行自监督学习,缺少外部知识指导,知识记忆能力和知识推理能力受限。为了解决以上问题,本文构建含有50万个三元组知识的藏文知识增强预训练数据集,联合结构化的知识表示和无结构化的文本表示,训练基于知识增强的藏文预训练语言模型TiKEM,以提高模型的知识记忆和推理能力。最后,本文在文本分类、实体关系分类和机器阅读理解三个下游任务中验证了模型的有效性。{''}",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deng-etal-2023-tikem">
<titleInfo>
<title>TiKEM:基于知识增强的藏文预训练语言模型(TiKEM: Knowledge Enhanced Tibetan Pre-trained Language Model)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Long</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">YUan</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaobin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Harbin, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“预训练语言模型在中英文领域有着优异的表现,而低资源语言数据获取难度大,预训练语言模型在低资源语言如藏文上的研究刚取得初步进展。现有的藏文预训练语言模型,使用大规模无结构的文本语料库进行自监督学习,缺少外部知识指导,知识记忆能力和知识推理能力受限。为了解决以上问题,本文构建含有50万个三元组知识的藏文知识增强预训练数据集,联合结构化的知识表示和无结构化的文本表示,训练基于知识增强的藏文预训练语言模型TiKEM,以提高模型的知识记忆和推理能力。最后,本文在文本分类、实体关系分类和机器阅读理解三个下游任务中验证了模型的有效性。”</abstract>
<identifier type="citekey">deng-etal-2023-tikem</identifier>
<location>
<url>https://aclanthology.org/2023.ccl-1.12</url>
</location>
<part>
<date>2023-08</date>
<extent unit="page">
<start>135</start>
<end>144</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TiKEM:基于知识增强的藏文预训练语言模型(TiKEM: Knowledge Enhanced Tibetan Pre-trained Language Model)
%A Deng, Junjie
%A Chen, Long
%A Zhang, Yan
%A Sun, YUan
%A Zhao, Xiaobin
%Y Sun, Maosong
%Y Qin, Bing
%Y Qiu, Xipeng
%Y Jiang, Jing
%Y Han, Xianpei
%S Proceedings of the 22nd Chinese National Conference on Computational Linguistics
%D 2023
%8 August
%I Chinese Information Processing Society of China
%C Harbin, China
%G Chinese
%F deng-etal-2023-tikem
%X “预训练语言模型在中英文领域有着优异的表现,而低资源语言数据获取难度大,预训练语言模型在低资源语言如藏文上的研究刚取得初步进展。现有的藏文预训练语言模型,使用大规模无结构的文本语料库进行自监督学习,缺少外部知识指导,知识记忆能力和知识推理能力受限。为了解决以上问题,本文构建含有50万个三元组知识的藏文知识增强预训练数据集,联合结构化的知识表示和无结构化的文本表示,训练基于知识增强的藏文预训练语言模型TiKEM,以提高模型的知识记忆和推理能力。最后,本文在文本分类、实体关系分类和机器阅读理解三个下游任务中验证了模型的有效性。”
%U https://aclanthology.org/2023.ccl-1.12
%P 135-144
Markdown (Informal)
[TiKEM:基于知识增强的藏文预训练语言模型(TiKEM: Knowledge Enhanced Tibetan Pre-trained Language Model)](https://aclanthology.org/2023.ccl-1.12) (Deng et al., CCL 2023)
ACL