@inproceedings{zong-etal-2021-ji,
title = "基于双编码器的医学文本中文分词({C}hinese word segmentation of medical text based on dual-encoder)",
author = "Zong, Yuan and
Chang, Baobao",
editor = "Li, Sheng and
Sun, Maosong and
Liu, Yang and
Wu, Hua and
Liu, Kang and
Che, Wanxiang and
He, Shizhu and
Rao, Gaoqi",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2021.ccl-1.8",
pages = "76--85",
abstract = "中文分词是自然语言处理领域的基础工作,然而前人的医学文本分词工作都只是直接套用通用分词的方法,而医学文本多专用术语的特点让分词系统需要对医学专用术语和医学文本中的非医学术语文本提供不同的分词粒度。本文提出了双编码器医学文本中文分词模型,利用辅助编码器为医学专有术语提供粗粒度表示。模型将需要粗粒度分词的医学专用术语和需要通用分词粒度的文本分开,在提升医学专用术语的分词能力的同时最大限度地避免了其粗粒度对于医学文本中通用文本分词的干扰。",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zong-etal-2021-ji">
<titleInfo>
<title>基于双编码器的医学文本中文分词(Chinese word segmentation of medical text based on dual-encoder)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baobao</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shizhu</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaoqi</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Huhhot, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>中文分词是自然语言处理领域的基础工作,然而前人的医学文本分词工作都只是直接套用通用分词的方法,而医学文本多专用术语的特点让分词系统需要对医学专用术语和医学文本中的非医学术语文本提供不同的分词粒度。本文提出了双编码器医学文本中文分词模型,利用辅助编码器为医学专有术语提供粗粒度表示。模型将需要粗粒度分词的医学专用术语和需要通用分词粒度的文本分开,在提升医学专用术语的分词能力的同时最大限度地避免了其粗粒度对于医学文本中通用文本分词的干扰。</abstract>
<identifier type="citekey">zong-etal-2021-ji</identifier>
<location>
<url>https://aclanthology.org/2021.ccl-1.8</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>76</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 基于双编码器的医学文本中文分词(Chinese word segmentation of medical text based on dual-encoder)
%A Zong, Yuan
%A Chang, Baobao
%Y Li, Sheng
%Y Sun, Maosong
%Y Liu, Yang
%Y Wu, Hua
%Y Liu, Kang
%Y Che, Wanxiang
%Y He, Shizhu
%Y Rao, Gaoqi
%S Proceedings of the 20th Chinese National Conference on Computational Linguistics
%D 2021
%8 August
%I Chinese Information Processing Society of China
%C Huhhot, China
%G Chinese
%F zong-etal-2021-ji
%X 中文分词是自然语言处理领域的基础工作,然而前人的医学文本分词工作都只是直接套用通用分词的方法,而医学文本多专用术语的特点让分词系统需要对医学专用术语和医学文本中的非医学术语文本提供不同的分词粒度。本文提出了双编码器医学文本中文分词模型,利用辅助编码器为医学专有术语提供粗粒度表示。模型将需要粗粒度分词的医学专用术语和需要通用分词粒度的文本分开,在提升医学专用术语的分词能力的同时最大限度地避免了其粗粒度对于医学文本中通用文本分词的干扰。
%U https://aclanthology.org/2021.ccl-1.8
%P 76-85
Markdown (Informal)
[基于双编码器的医学文本中文分词(Chinese word segmentation of medical text based on dual-encoder)](https://aclanthology.org/2021.ccl-1.8) (Zong & Chang, CCL 2021)
ACL