@inproceedings{hou-etal-2021-coarse,
title = "A Coarse-to-Fine Labeling Framework for Joint Word Segmentation, {POS} Tagging, and Constituent Parsing",
author = "Hou, Yang and
Zhou, Houquan and
Li, Zhenghua and
Zhang, Yu and
Zhang, Min and
Wang, Zhefeng and
Huai, Baoxing and
Yuan, Nicholas Jing",
editor = "Bisazza, Arianna and
Abend, Omri",
booktitle = "Proceedings of the 25th Conference on Computational Natural Language Learning",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.conll-1.23",
doi = "10.18653/v1/2021.conll-1.23",
pages = "290--299",
abstract = "The most straightforward approach to joint word segmentation (WS), part-of-speech (POS) tagging, and constituent parsing is converting a word-level tree into a char-level tree, which, however, leads to two severe challenges. First, a larger label set (e.g., {\mbox{$\geq$}} 600) and longer inputs both increase computational costs. Second, it is difficult to rule out illegal trees containing conflicting production rules, which is important for reliable model evaluation. If a POS tag (like VV) is above a phrase tag (like VP) in the output tree, it becomes quite complex to decide word boundaries. To deal with both challenges, this work proposes a two-stage coarse-to-fine labeling framework for joint WS-POS-PAR. In the coarse labeling stage, the joint model outputs a bracketed tree, in which each node corresponds to one of four labels (i.e., phrase, subphrase, word, subword). The tree is guaranteed to be legal via constrained CKY decoding. In the fine labeling stage, the model expands each coarse label into a final label (such as VP, VP*, VV, VV*). Experiments on Chinese Penn Treebank 5.1 and 7.0 show that our joint model consistently outperforms the pipeline approach on both settings of w/o and w/ BERT, and achieves new state-of-the-art performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hou-etal-2021-coarse">
<titleInfo>
<title>A Coarse-to-Fine Labeling Framework for Joint Word Segmentation, POS Tagging, and Constituent Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houquan</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenghua</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhefeng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baoxing</namePart>
<namePart type="family">Huai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="given">Jing</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omri</namePart>
<namePart type="family">Abend</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The most straightforward approach to joint word segmentation (WS), part-of-speech (POS) tagging, and constituent parsing is converting a word-level tree into a char-level tree, which, however, leads to two severe challenges. First, a larger label set (e.g., \geq 600) and longer inputs both increase computational costs. Second, it is difficult to rule out illegal trees containing conflicting production rules, which is important for reliable model evaluation. If a POS tag (like VV) is above a phrase tag (like VP) in the output tree, it becomes quite complex to decide word boundaries. To deal with both challenges, this work proposes a two-stage coarse-to-fine labeling framework for joint WS-POS-PAR. In the coarse labeling stage, the joint model outputs a bracketed tree, in which each node corresponds to one of four labels (i.e., phrase, subphrase, word, subword). The tree is guaranteed to be legal via constrained CKY decoding. In the fine labeling stage, the model expands each coarse label into a final label (such as VP, VP*, VV, VV*). Experiments on Chinese Penn Treebank 5.1 and 7.0 show that our joint model consistently outperforms the pipeline approach on both settings of w/o and w/ BERT, and achieves new state-of-the-art performance.</abstract>
<identifier type="citekey">hou-etal-2021-coarse</identifier>
<identifier type="doi">10.18653/v1/2021.conll-1.23</identifier>
<location>
<url>https://aclanthology.org/2021.conll-1.23</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>290</start>
<end>299</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Coarse-to-Fine Labeling Framework for Joint Word Segmentation, POS Tagging, and Constituent Parsing
%A Hou, Yang
%A Zhou, Houquan
%A Li, Zhenghua
%A Zhang, Yu
%A Zhang, Min
%A Wang, Zhefeng
%A Huai, Baoxing
%A Yuan, Nicholas Jing
%Y Bisazza, Arianna
%Y Abend, Omri
%S Proceedings of the 25th Conference on Computational Natural Language Learning
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F hou-etal-2021-coarse
%X The most straightforward approach to joint word segmentation (WS), part-of-speech (POS) tagging, and constituent parsing is converting a word-level tree into a char-level tree, which, however, leads to two severe challenges. First, a larger label set (e.g., \geq 600) and longer inputs both increase computational costs. Second, it is difficult to rule out illegal trees containing conflicting production rules, which is important for reliable model evaluation. If a POS tag (like VV) is above a phrase tag (like VP) in the output tree, it becomes quite complex to decide word boundaries. To deal with both challenges, this work proposes a two-stage coarse-to-fine labeling framework for joint WS-POS-PAR. In the coarse labeling stage, the joint model outputs a bracketed tree, in which each node corresponds to one of four labels (i.e., phrase, subphrase, word, subword). The tree is guaranteed to be legal via constrained CKY decoding. In the fine labeling stage, the model expands each coarse label into a final label (such as VP, VP*, VV, VV*). Experiments on Chinese Penn Treebank 5.1 and 7.0 show that our joint model consistently outperforms the pipeline approach on both settings of w/o and w/ BERT, and achieves new state-of-the-art performance.
%R 10.18653/v1/2021.conll-1.23
%U https://aclanthology.org/2021.conll-1.23
%U https://doi.org/10.18653/v1/2021.conll-1.23
%P 290-299
Markdown (Informal)
[A Coarse-to-Fine Labeling Framework for Joint Word Segmentation, POS Tagging, and Constituent Parsing](https://aclanthology.org/2021.conll-1.23) (Hou et al., CoNLL 2021)
ACL
- Yang Hou, Houquan Zhou, Zhenghua Li, Yu Zhang, Min Zhang, Zhefeng Wang, Baoxing Huai, and Nicholas Jing Yuan. 2021. A Coarse-to-Fine Labeling Framework for Joint Word Segmentation, POS Tagging, and Constituent Parsing. In Proceedings of the 25th Conference on Computational Natural Language Learning, pages 290–299, Online. Association for Computational Linguistics.