@inproceedings{dao-etal-2022-disfluency,
title = "Disfluency Detection for {V}ietnamese",
author = "Dao, Mai Hoang and
Truong, Thinh Hung and
Nguyen, Dat Quoc",
booktitle = "Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wnut-1.21/",
pages = "194--200",
abstract = "In this paper, we present the first empirical study for Vietnamese disfluency detection. To conduct this study, we first create a disfluency detection dataset for Vietnamese, with manual annotations over two disfluency types. We then empirically perform experiments using strong baseline models, and find that: automatic Vietnamese word segmentation improves the disfluency detection performances of the baselines, and the highest performance results are obtained by fine-tuning pre-trained language models in which the monolingual model PhoBERT for Vietnamese does better than the multilingual model XLM-R."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dao-etal-2022-disfluency">
<titleInfo>
<title>Disfluency Detection for Vietnamese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mai</namePart>
<namePart type="given">Hoang</namePart>
<namePart type="family">Dao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thinh</namePart>
<namePart type="given">Hung</namePart>
<namePart type="family">Truong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dat</namePart>
<namePart type="given">Quoc</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present the first empirical study for Vietnamese disfluency detection. To conduct this study, we first create a disfluency detection dataset for Vietnamese, with manual annotations over two disfluency types. We then empirically perform experiments using strong baseline models, and find that: automatic Vietnamese word segmentation improves the disfluency detection performances of the baselines, and the highest performance results are obtained by fine-tuning pre-trained language models in which the monolingual model PhoBERT for Vietnamese does better than the multilingual model XLM-R.</abstract>
<identifier type="citekey">dao-etal-2022-disfluency</identifier>
<location>
<url>https://aclanthology.org/2022.wnut-1.21/</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>194</start>
<end>200</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Disfluency Detection for Vietnamese
%A Dao, Mai Hoang
%A Truong, Thinh Hung
%A Nguyen, Dat Quoc
%S Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F dao-etal-2022-disfluency
%X In this paper, we present the first empirical study for Vietnamese disfluency detection. To conduct this study, we first create a disfluency detection dataset for Vietnamese, with manual annotations over two disfluency types. We then empirically perform experiments using strong baseline models, and find that: automatic Vietnamese word segmentation improves the disfluency detection performances of the baselines, and the highest performance results are obtained by fine-tuning pre-trained language models in which the monolingual model PhoBERT for Vietnamese does better than the multilingual model XLM-R.
%U https://aclanthology.org/2022.wnut-1.21/
%P 194-200
Markdown (Informal)
[Disfluency Detection for Vietnamese](https://aclanthology.org/2022.wnut-1.21/) (Dao et al., WNUT 2022)
ACL
- Mai Hoang Dao, Thinh Hung Truong, and Dat Quoc Nguyen. 2022. Disfluency Detection for Vietnamese. In Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022), pages 194–200, Gyeongju, Republic of Korea. Association for Computational Linguistics.