@inproceedings{zhou-etal-2024-chinese,
title = "{C}hinese Spoken Named Entity Recognition in Real-world Scenarios: Dataset and Approaches",
author = "Zhou, Shilin and
Li, Zhenghua and
Gong, Chen and
Zhang, Lei and
Hong, Yu and
Zhang, Min",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.111",
doi = "10.18653/v1/2024.findings-acl.111",
pages = "1872--1884",
abstract = "Spoken Named Entity Recognition (NER) aims to extract entities from speech. The extracted entities can help voice assistants better understand user{'}s questions and instructions. However, current Chinese Spoken NER datasets are laboratory-controlled data that are collected by reading existing texts in quiet environments, rather than natural spoken data, and the texts used for reading are also limited in topics. These limitations obstruct the development of Spoken NER in more natural and common real-world scenarios. To address this gap, we introduce a real-world Chinese Spoken NER dataset (RWCS-NER), encompassing open-domain daily conversations and task-oriented intelligent cockpit instructions. We compare several mainstream pipeline approaches on RWCS-NER. The results indicate that the current methods, affected by Automatic Speech Recognition (ASR) errors, do not perform satisfactorily in real settings. Aiming to enhance Spoken NER in real-world scenarios, we propose two approaches: self-training-asr and mapping then distilling (MDistilling). Experiments show that both approaches can achieve significant improvements, particularly MDistilling. Even compared with GPT4.0, MDistilling still reaches better results. We believe that our work will advance the field of Spoken NER in real-world settings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2024-chinese">
<titleInfo>
<title>Chinese Spoken Named Entity Recognition in Real-world Scenarios: Dataset and Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shilin</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenghua</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spoken Named Entity Recognition (NER) aims to extract entities from speech. The extracted entities can help voice assistants better understand user’s questions and instructions. However, current Chinese Spoken NER datasets are laboratory-controlled data that are collected by reading existing texts in quiet environments, rather than natural spoken data, and the texts used for reading are also limited in topics. These limitations obstruct the development of Spoken NER in more natural and common real-world scenarios. To address this gap, we introduce a real-world Chinese Spoken NER dataset (RWCS-NER), encompassing open-domain daily conversations and task-oriented intelligent cockpit instructions. We compare several mainstream pipeline approaches on RWCS-NER. The results indicate that the current methods, affected by Automatic Speech Recognition (ASR) errors, do not perform satisfactorily in real settings. Aiming to enhance Spoken NER in real-world scenarios, we propose two approaches: self-training-asr and mapping then distilling (MDistilling). Experiments show that both approaches can achieve significant improvements, particularly MDistilling. Even compared with GPT4.0, MDistilling still reaches better results. We believe that our work will advance the field of Spoken NER in real-world settings.</abstract>
<identifier type="citekey">zhou-etal-2024-chinese</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.111</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.111</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1872</start>
<end>1884</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Chinese Spoken Named Entity Recognition in Real-world Scenarios: Dataset and Approaches
%A Zhou, Shilin
%A Li, Zhenghua
%A Gong, Chen
%A Zhang, Lei
%A Hong, Yu
%A Zhang, Min
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F zhou-etal-2024-chinese
%X Spoken Named Entity Recognition (NER) aims to extract entities from speech. The extracted entities can help voice assistants better understand user’s questions and instructions. However, current Chinese Spoken NER datasets are laboratory-controlled data that are collected by reading existing texts in quiet environments, rather than natural spoken data, and the texts used for reading are also limited in topics. These limitations obstruct the development of Spoken NER in more natural and common real-world scenarios. To address this gap, we introduce a real-world Chinese Spoken NER dataset (RWCS-NER), encompassing open-domain daily conversations and task-oriented intelligent cockpit instructions. We compare several mainstream pipeline approaches on RWCS-NER. The results indicate that the current methods, affected by Automatic Speech Recognition (ASR) errors, do not perform satisfactorily in real settings. Aiming to enhance Spoken NER in real-world scenarios, we propose two approaches: self-training-asr and mapping then distilling (MDistilling). Experiments show that both approaches can achieve significant improvements, particularly MDistilling. Even compared with GPT4.0, MDistilling still reaches better results. We believe that our work will advance the field of Spoken NER in real-world settings.
%R 10.18653/v1/2024.findings-acl.111
%U https://aclanthology.org/2024.findings-acl.111
%U https://doi.org/10.18653/v1/2024.findings-acl.111
%P 1872-1884
Markdown (Informal)
[Chinese Spoken Named Entity Recognition in Real-world Scenarios: Dataset and Approaches](https://aclanthology.org/2024.findings-acl.111) (Zhou et al., Findings 2024)
ACL