@inproceedings{peiyuan-etal-2024-mian,
title = "面向{``}以{A}为{B}{''}构式语义场景的汉语框架识别数据集构建{\ensuremath{\star}}(Dataset for Recognizing {C}hinese Semantic Frames based on the Semantic Scenario of the ``Yi A Wei {B}'' Construction)",
author = "Yang, Peiyuan and
Su, Xuefeng and
Li, Juncai and
Yan, Zhichao and
Chai, Qinghua and
Li, Ru",
editor = "Maosong, Sun and
Jiye, Liang and
Xianpei, Han and
Zhiyuan, Liu and
Yulan, He",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2024.ccl-1.63/",
pages = "807--818",
language = "zho",
abstract = "``汉语中普遍存在一些语义场景,其语义核心不是以单个词语呈现,而是通过句子中的某个特定结构来表达。然而当前公开发表的数据集中,只有极少数的数据集将这种特定结构作为语义单元进行研究。汉语框架语义知识库是进行汉语深层语义分析与推理的优质资源,目前其激活框架的基本单位均为句中的一个词。本文以汉语框架语义知识库为基础,引入构式语法,使用2020《人民日报》语料库,以{``}以A为B{''}构式为例,建立了基于{``}以A为B{''}构式的汉语框架识别数据集,包含23849条例句,相应框架141个。本文使用多个汉语框架识别模型及大语言模型在该数据集上进行了实验,并针对传统框架识别模型在以构式为目标词的框架识别任务中由于目标词信息匮乏导致的识别困难问题,提出了基于目标词转化和数据增强的两种方法,使模型准确率达到了88.19{\%},有效提升了模型挖掘构式蕴含的深层语义信息的能力。''"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="peiyuan-etal-2024-mian">
<titleInfo>
<title>面向“以A为B”构式语义场景的汉语框架识别数据集构建\ensuremath\star(Dataset for Recognizing Chinese Semantic Frames based on the Semantic Scenario of the “Yi A Wei B” Construction)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peiyuan</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuefeng</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juncai</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhichao</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qinghua</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ru</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">zho</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sun</namePart>
<namePart type="family">Maosong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Jiye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Han</namePart>
<namePart type="family">Xianpei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liu</namePart>
<namePart type="family">Zhiyuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">Yulan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Taiyuan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“汉语中普遍存在一些语义场景,其语义核心不是以单个词语呈现,而是通过句子中的某个特定结构来表达。然而当前公开发表的数据集中,只有极少数的数据集将这种特定结构作为语义单元进行研究。汉语框架语义知识库是进行汉语深层语义分析与推理的优质资源,目前其激活框架的基本单位均为句中的一个词。本文以汉语框架语义知识库为基础,引入构式语法,使用2020《人民日报》语料库,以“以A为B”构式为例,建立了基于“以A为B”构式的汉语框架识别数据集,包含23849条例句,相应框架141个。本文使用多个汉语框架识别模型及大语言模型在该数据集上进行了实验,并针对传统框架识别模型在以构式为目标词的框架识别任务中由于目标词信息匮乏导致的识别困难问题,提出了基于目标词转化和数据增强的两种方法,使模型准确率达到了88.19%,有效提升了模型挖掘构式蕴含的深层语义信息的能力。”</abstract>
<identifier type="citekey">peiyuan-etal-2024-mian</identifier>
<location>
<url>https://aclanthology.org/2024.ccl-1.63/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>807</start>
<end>818</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 面向“以A为B”构式语义场景的汉语框架识别数据集构建\ensuremath\star(Dataset for Recognizing Chinese Semantic Frames based on the Semantic Scenario of the “Yi A Wei B” Construction)
%A Yang, Peiyuan
%A Su, Xuefeng
%A Li, Juncai
%A Yan, Zhichao
%A Chai, Qinghua
%A Li, Ru
%Y Maosong, Sun
%Y Jiye, Liang
%Y Xianpei, Han
%Y Zhiyuan, Liu
%Y Yulan, He
%S Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)
%D 2024
%8 July
%I Chinese Information Processing Society of China
%C Taiyuan, China
%G zho
%F peiyuan-etal-2024-mian
%X “汉语中普遍存在一些语义场景,其语义核心不是以单个词语呈现,而是通过句子中的某个特定结构来表达。然而当前公开发表的数据集中,只有极少数的数据集将这种特定结构作为语义单元进行研究。汉语框架语义知识库是进行汉语深层语义分析与推理的优质资源,目前其激活框架的基本单位均为句中的一个词。本文以汉语框架语义知识库为基础,引入构式语法,使用2020《人民日报》语料库,以“以A为B”构式为例,建立了基于“以A为B”构式的汉语框架识别数据集,包含23849条例句,相应框架141个。本文使用多个汉语框架识别模型及大语言模型在该数据集上进行了实验,并针对传统框架识别模型在以构式为目标词的框架识别任务中由于目标词信息匮乏导致的识别困难问题,提出了基于目标词转化和数据增强的两种方法,使模型准确率达到了88.19%,有效提升了模型挖掘构式蕴含的深层语义信息的能力。”
%U https://aclanthology.org/2024.ccl-1.63/
%P 807-818
Markdown (Informal)
[面向“以A为B”构式语义场景的汉语框架识别数据集构建⋆(Dataset for Recognizing Chinese Semantic Frames based on the Semantic Scenario of the “Yi A Wei B” Construction)](https://aclanthology.org/2024.ccl-1.63/) (Yang et al., CCL 2024)
ACL