@inproceedings{tingchao-etal-2024-mian,
title = "面向{CQL}的语料库检索引擎的高效实现(Efficient Implementation of a {CQL}-oriented Corpus Retrieval Engine)",
author = "Liu, Tingchao and
Lu, Luming and
Yang, Liner and
Wang, Yu",
editor = "Maosong, Sun and
Jiye, Liang and
Xianpei, Han and
Zhiyuan, Liu and
Yulan, He",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2024.ccl-1.4/",
pages = "46--56",
language = "zho",
abstract = "``语料库检索工具在语言学研究领域具有举足轻重的地位,对于高效获取信息至关重要。然而,当前国内语料库检索工具在语料库检索语言上缺乏统一标准,尤其支持语料库查询语言(CQL)的中文语料库检索工具相对稀缺。在使用不同分词粒度的语料库工具进行中文语料库检索时,会遇到噪声或数据召回难问题。为应对这些挑战,我们研发了支持多粒度分词的CQL 解析器系统CAMELS:一款支持CQL 语句检索,且兼容多粒度分词,支持非词典词检索的语料库检索引擎。经过多种分词器的测试,该引擎展现出了优异的召回率,并在性能上超越了BlackLab的检索速度,为语言学工作者提供了更加易用、精准的检索工具。''"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tingchao-etal-2024-mian">
<titleInfo>
<title>面向CQL的语料库检索引擎的高效实现(Efficient Implementation of a CQL-oriented Corpus Retrieval Engine)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tingchao</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luming</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liner</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">zho</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sun</namePart>
<namePart type="family">Maosong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Jiye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Han</namePart>
<namePart type="family">Xianpei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liu</namePart>
<namePart type="family">Zhiyuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">Yulan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Taiyuan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“语料库检索工具在语言学研究领域具有举足轻重的地位,对于高效获取信息至关重要。然而,当前国内语料库检索工具在语料库检索语言上缺乏统一标准,尤其支持语料库查询语言(CQL)的中文语料库检索工具相对稀缺。在使用不同分词粒度的语料库工具进行中文语料库检索时,会遇到噪声或数据召回难问题。为应对这些挑战,我们研发了支持多粒度分词的CQL 解析器系统CAMELS:一款支持CQL 语句检索,且兼容多粒度分词,支持非词典词检索的语料库检索引擎。经过多种分词器的测试,该引擎展现出了优异的召回率,并在性能上超越了BlackLab的检索速度,为语言学工作者提供了更加易用、精准的检索工具。”</abstract>
<identifier type="citekey">tingchao-etal-2024-mian</identifier>
<location>
<url>https://aclanthology.org/2024.ccl-1.4/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>46</start>
<end>56</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 面向CQL的语料库检索引擎的高效实现(Efficient Implementation of a CQL-oriented Corpus Retrieval Engine)
%A Liu, Tingchao
%A Lu, Luming
%A Yang, Liner
%A Wang, Yu
%Y Maosong, Sun
%Y Jiye, Liang
%Y Xianpei, Han
%Y Zhiyuan, Liu
%Y Yulan, He
%S Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)
%D 2024
%8 July
%I Chinese Information Processing Society of China
%C Taiyuan, China
%G zho
%F tingchao-etal-2024-mian
%X “语料库检索工具在语言学研究领域具有举足轻重的地位,对于高效获取信息至关重要。然而,当前国内语料库检索工具在语料库检索语言上缺乏统一标准,尤其支持语料库查询语言(CQL)的中文语料库检索工具相对稀缺。在使用不同分词粒度的语料库工具进行中文语料库检索时,会遇到噪声或数据召回难问题。为应对这些挑战,我们研发了支持多粒度分词的CQL 解析器系统CAMELS:一款支持CQL 语句检索,且兼容多粒度分词,支持非词典词检索的语料库检索引擎。经过多种分词器的测试,该引擎展现出了优异的召回率,并在性能上超越了BlackLab的检索速度,为语言学工作者提供了更加易用、精准的检索工具。”
%U https://aclanthology.org/2024.ccl-1.4/
%P 46-56
Markdown (Informal)
[面向CQL的语料库检索引擎的高效实现(Efficient Implementation of a CQL-oriented Corpus Retrieval Engine)](https://aclanthology.org/2024.ccl-1.4/) (Liu et al., CCL 2024)
ACL