@inproceedings{zhou-etal-2016-word,
title = "A Word Labeling Approach to {T}hai Sentence Boundary Detection and {POS} Tagging",
author = "Zhou, Nina and
Aw, AiTi and
Lertcheva, Nattadaporn and
Wang, Xuancong",
editor = "Matsumoto, Yuji and
Prasad, Rashmi",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/C16-1031/",
pages = "319--327",
abstract = "Previous studies on Thai Sentence Boundary Detection (SBD) mostly assumed sentence ends at a space disambiguation problem, which classified space either as an indicator for Sentence Boundary (SB) or non-Sentence Boundary (nSB). In this paper, we propose a word labeling approach which treats space as a normal word, and detects SB between any two words. This removes the restriction for SB to be oc-curred only at space and makes our system more robust for modern Thai writing. It is because in modern Thai writing, space is not consistently used to indicate SB. As syntactic information contributes to better SBD, we further propose a joint Part-Of-Speech (POS) tagging and SBD framework based on Factorial Conditional Random Field (FCRF) model. We compare the performance of our proposed ap-proach with reported methods on ORCHID corpus. We also performed experiments of FCRF model on the TaLAPi corpus. The results show that the word labelling approach has better performance than pre-vious space-based classification approaches and FCRF joint model outperforms LCRF model in terms of SBD in all experiments."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2016-word">
<titleInfo>
<title>A Word Labeling Approach to Thai Sentence Boundary Detection and POS Tagging</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">AiTi</namePart>
<namePart type="family">Aw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nattadaporn</namePart>
<namePart type="family">Lertcheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuancong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuji</namePart>
<namePart type="family">Matsumoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Prasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous studies on Thai Sentence Boundary Detection (SBD) mostly assumed sentence ends at a space disambiguation problem, which classified space either as an indicator for Sentence Boundary (SB) or non-Sentence Boundary (nSB). In this paper, we propose a word labeling approach which treats space as a normal word, and detects SB between any two words. This removes the restriction for SB to be oc-curred only at space and makes our system more robust for modern Thai writing. It is because in modern Thai writing, space is not consistently used to indicate SB. As syntactic information contributes to better SBD, we further propose a joint Part-Of-Speech (POS) tagging and SBD framework based on Factorial Conditional Random Field (FCRF) model. We compare the performance of our proposed ap-proach with reported methods on ORCHID corpus. We also performed experiments of FCRF model on the TaLAPi corpus. The results show that the word labelling approach has better performance than pre-vious space-based classification approaches and FCRF joint model outperforms LCRF model in terms of SBD in all experiments.</abstract>
<identifier type="citekey">zhou-etal-2016-word</identifier>
<location>
<url>https://aclanthology.org/C16-1031/</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>319</start>
<end>327</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Word Labeling Approach to Thai Sentence Boundary Detection and POS Tagging
%A Zhou, Nina
%A Aw, AiTi
%A Lertcheva, Nattadaporn
%A Wang, Xuancong
%Y Matsumoto, Yuji
%Y Prasad, Rashmi
%S Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F zhou-etal-2016-word
%X Previous studies on Thai Sentence Boundary Detection (SBD) mostly assumed sentence ends at a space disambiguation problem, which classified space either as an indicator for Sentence Boundary (SB) or non-Sentence Boundary (nSB). In this paper, we propose a word labeling approach which treats space as a normal word, and detects SB between any two words. This removes the restriction for SB to be oc-curred only at space and makes our system more robust for modern Thai writing. It is because in modern Thai writing, space is not consistently used to indicate SB. As syntactic information contributes to better SBD, we further propose a joint Part-Of-Speech (POS) tagging and SBD framework based on Factorial Conditional Random Field (FCRF) model. We compare the performance of our proposed ap-proach with reported methods on ORCHID corpus. We also performed experiments of FCRF model on the TaLAPi corpus. The results show that the word labelling approach has better performance than pre-vious space-based classification approaches and FCRF joint model outperforms LCRF model in terms of SBD in all experiments.
%U https://aclanthology.org/C16-1031/
%P 319-327
Markdown (Informal)
[A Word Labeling Approach to Thai Sentence Boundary Detection and POS Tagging](https://aclanthology.org/C16-1031/) (Zhou et al., COLING 2016)
ACL