@inproceedings{liu-etal-2023-ccl23-eval-ren-wu,
title = "{CCL}23-Eval 任务7赛道一系统报告:基于序列到序列模型的自动化文本纠错系统(System Report for {CCL}23-Eval Task 7 Track 1: Automated text error correction pipeline based on sequence-to-sequence models)",
author = "Liu, Shixuan and
Liu, Xinzhang and
Huang, Yuyao and
Wang, Chao and
Song, Yongshuang",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2023.ccl-3.24",
pages = "213--219",
abstract = "{``}本文介绍了本队伍在CCL-2023汉语学习者文本纠错评测大赛赛道一中提交的参赛系统。近年来,大规模的中文预训练模型在各种任务上表现出色,而不同的预训练模型在特定任务上也各有优势。然而,由于汉语学习者文本纠错任务存在语法错误复杂和纠错语料稀缺等特点,因此采用基于序列标记的预训练文本纠错模型来解决问题是自然的选择。我们的团队采用了序列到序列的纠错模型,并采取了两阶段训练策略,设计了一套基于序列到序列文本纠错的pipeline。首先,我们对训练集数据进行了清洗处理;在第一阶段训练中,我们在训练集上使用数据增强技术;在第二阶段,我们利用验证集进行微调,并最终采用多个模型投票集成的方式完成后处理。在实际的系统测评中,我们提交的结果在封闭任务排行榜上超出baseline模型17.01分(40.59-{\textgreater}57.6)。{''}",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2023-ccl23-eval-ren-wu">
<titleInfo>
<title>CCL23-Eval 任务7赛道一系统报告:基于序列到序列模型的自动化文本纠错系统(System Report for CCL23-Eval Task 7 Track 1: Automated text error correction pipeline based on sequence-to-sequence models)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shixuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinzhang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuyao</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongshuang</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Harbin, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“本文介绍了本队伍在CCL-2023汉语学习者文本纠错评测大赛赛道一中提交的参赛系统。近年来,大规模的中文预训练模型在各种任务上表现出色,而不同的预训练模型在特定任务上也各有优势。然而,由于汉语学习者文本纠错任务存在语法错误复杂和纠错语料稀缺等特点,因此采用基于序列标记的预训练文本纠错模型来解决问题是自然的选择。我们的团队采用了序列到序列的纠错模型,并采取了两阶段训练策略,设计了一套基于序列到序列文本纠错的pipeline。首先,我们对训练集数据进行了清洗处理;在第一阶段训练中,我们在训练集上使用数据增强技术;在第二阶段,我们利用验证集进行微调,并最终采用多个模型投票集成的方式完成后处理。在实际的系统测评中,我们提交的结果在封闭任务排行榜上超出baseline模型17.01分(40.59-\textgreater57.6)。”</abstract>
<identifier type="citekey">liu-etal-2023-ccl23-eval-ren-wu</identifier>
<location>
<url>https://aclanthology.org/2023.ccl-3.24</url>
</location>
<part>
<date>2023-08</date>
<extent unit="page">
<start>213</start>
<end>219</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CCL23-Eval 任务7赛道一系统报告:基于序列到序列模型的自动化文本纠错系统(System Report for CCL23-Eval Task 7 Track 1: Automated text error correction pipeline based on sequence-to-sequence models)
%A Liu, Shixuan
%A Liu, Xinzhang
%A Huang, Yuyao
%A Wang, Chao
%A Song, Yongshuang
%Y Sun, Maosong
%Y Qin, Bing
%Y Qiu, Xipeng
%Y Jiang, Jing
%Y Han, Xianpei
%S Proceedings of the 22nd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)
%D 2023
%8 August
%I Chinese Information Processing Society of China
%C Harbin, China
%G Chinese
%F liu-etal-2023-ccl23-eval-ren-wu
%X “本文介绍了本队伍在CCL-2023汉语学习者文本纠错评测大赛赛道一中提交的参赛系统。近年来,大规模的中文预训练模型在各种任务上表现出色,而不同的预训练模型在特定任务上也各有优势。然而,由于汉语学习者文本纠错任务存在语法错误复杂和纠错语料稀缺等特点,因此采用基于序列标记的预训练文本纠错模型来解决问题是自然的选择。我们的团队采用了序列到序列的纠错模型,并采取了两阶段训练策略,设计了一套基于序列到序列文本纠错的pipeline。首先,我们对训练集数据进行了清洗处理;在第一阶段训练中,我们在训练集上使用数据增强技术;在第二阶段,我们利用验证集进行微调,并最终采用多个模型投票集成的方式完成后处理。在实际的系统测评中,我们提交的结果在封闭任务排行榜上超出baseline模型17.01分(40.59-\textgreater57.6)。”
%U https://aclanthology.org/2023.ccl-3.24
%P 213-219
Markdown (Informal)
[CCL23-Eval 任务7赛道一系统报告:基于序列到序列模型的自动化文本纠错系统(System Report for CCL23-Eval Task 7 Track 1: Automated text error correction pipeline based on sequence-to-sequence models)](https://aclanthology.org/2023.ccl-3.24) (Liu et al., CCL 2023)
ACL