@inproceedings{inoue-etal-2017-joint,
title = "Joint Prediction of Morphosyntactic Categories for Fine-Grained {A}rabic Part-of-Speech Tagging Exploiting Tag Dictionary Information",
author = "Inoue, Go and
Shindo, Hiroyuki and
Matsumoto, Yuji",
editor = "Levy, Roger and
Specia, Lucia",
booktitle = "Proceedings of the 21st Conference on Computational Natural Language Learning ({C}o{NLL} 2017)",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K17-1042/",
doi = "10.18653/v1/K17-1042",
pages = "421--431",
abstract = "Part-of-speech (POS) tagging for morphologically rich languages such as Arabic is a challenging problem because of their enormous tag sets. One reason for this is that in the tagging scheme for such languages, a complete POS tag is formed by combining tags from multiple tag sets defined for each morphosyntactic category. Previous approaches in Arabic POS tagging applied one model for each morphosyntactic tagging task, without utilizing shared information between the tasks. In this paper, we propose an approach that utilizes this information by jointly modeling multiple morphosyntactic tagging tasks with a multi-task learning framework. We also propose a method of incorporating tag dictionary information into our neural models by combining word representations with representations of the sets of possible tags. Our experiments showed that the joint model with tag dictionary information results in an accuracy of 91.38{\%} on the Penn Arabic Treebank data set, with an absolute improvement of 2.11{\%} over the current state-of-the-art tagger."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="inoue-etal-2017-joint">
<titleInfo>
<title>Joint Prediction of Morphosyntactic Categories for Fine-Grained Arabic Part-of-Speech Tagging Exploiting Tag Dictionary Information</title>
</titleInfo>
<name type="personal">
<namePart type="given">Go</namePart>
<namePart type="family">Inoue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroyuki</namePart>
<namePart type="family">Shindo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuji</namePart>
<namePart type="family">Matsumoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roger</namePart>
<namePart type="family">Levy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Part-of-speech (POS) tagging for morphologically rich languages such as Arabic is a challenging problem because of their enormous tag sets. One reason for this is that in the tagging scheme for such languages, a complete POS tag is formed by combining tags from multiple tag sets defined for each morphosyntactic category. Previous approaches in Arabic POS tagging applied one model for each morphosyntactic tagging task, without utilizing shared information between the tasks. In this paper, we propose an approach that utilizes this information by jointly modeling multiple morphosyntactic tagging tasks with a multi-task learning framework. We also propose a method of incorporating tag dictionary information into our neural models by combining word representations with representations of the sets of possible tags. Our experiments showed that the joint model with tag dictionary information results in an accuracy of 91.38% on the Penn Arabic Treebank data set, with an absolute improvement of 2.11% over the current state-of-the-art tagger.</abstract>
<identifier type="citekey">inoue-etal-2017-joint</identifier>
<identifier type="doi">10.18653/v1/K17-1042</identifier>
<location>
<url>https://aclanthology.org/K17-1042/</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>421</start>
<end>431</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Joint Prediction of Morphosyntactic Categories for Fine-Grained Arabic Part-of-Speech Tagging Exploiting Tag Dictionary Information
%A Inoue, Go
%A Shindo, Hiroyuki
%A Matsumoto, Yuji
%Y Levy, Roger
%Y Specia, Lucia
%S Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017)
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F inoue-etal-2017-joint
%X Part-of-speech (POS) tagging for morphologically rich languages such as Arabic is a challenging problem because of their enormous tag sets. One reason for this is that in the tagging scheme for such languages, a complete POS tag is formed by combining tags from multiple tag sets defined for each morphosyntactic category. Previous approaches in Arabic POS tagging applied one model for each morphosyntactic tagging task, without utilizing shared information between the tasks. In this paper, we propose an approach that utilizes this information by jointly modeling multiple morphosyntactic tagging tasks with a multi-task learning framework. We also propose a method of incorporating tag dictionary information into our neural models by combining word representations with representations of the sets of possible tags. Our experiments showed that the joint model with tag dictionary information results in an accuracy of 91.38% on the Penn Arabic Treebank data set, with an absolute improvement of 2.11% over the current state-of-the-art tagger.
%R 10.18653/v1/K17-1042
%U https://aclanthology.org/K17-1042/
%U https://doi.org/10.18653/v1/K17-1042
%P 421-431
Markdown (Informal)
[Joint Prediction of Morphosyntactic Categories for Fine-Grained Arabic Part-of-Speech Tagging Exploiting Tag Dictionary Information](https://aclanthology.org/K17-1042/) (Inoue et al., CoNLL 2017)
ACL