@inproceedings{walentynowicz-etal-2019-tagger,
title = "Tagger for {P}olish Computer Mediated Communication Texts",
author = "Walentynowicz, Wiktor and
Piasecki, Maciej and
Oleksy, Marcin",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1148",
doi = "10.26615/978-954-452-056-4_148",
pages = "1295--1303",
abstract = "In this paper we present a morpho-syntactic tagger dedicated to Computer-mediated Communication texts in Polish. Its construction is based on an expanded RNN-based neural network adapted to the work on noisy texts. Among several techniques, the tagger utilises fastText embedding vectors, sequential character embedding vectors, and Brown clustering for the coarse-grained representation of sentence structures. In addition a set of manually written rules was proposed for post-processing. The system was trained to disambiguate descriptions of words in relation to Parts of Speech tags together with the full morphological information in terms of values for the different grammatical categories. We present also evaluation of several model variants on the gold standard annotated CMC data, comparison to the state-of-the-art taggers for Polish and error analysis. The proposed tagger shows significantly better results in this domain and demonstrates the viability of adaptation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="walentynowicz-etal-2019-tagger">
<titleInfo>
<title>Tagger for Polish Computer Mediated Communication Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wiktor</namePart>
<namePart type="family">Walentynowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Piasecki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcin</namePart>
<namePart type="family">Oleksy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present a morpho-syntactic tagger dedicated to Computer-mediated Communication texts in Polish. Its construction is based on an expanded RNN-based neural network adapted to the work on noisy texts. Among several techniques, the tagger utilises fastText embedding vectors, sequential character embedding vectors, and Brown clustering for the coarse-grained representation of sentence structures. In addition a set of manually written rules was proposed for post-processing. The system was trained to disambiguate descriptions of words in relation to Parts of Speech tags together with the full morphological information in terms of values for the different grammatical categories. We present also evaluation of several model variants on the gold standard annotated CMC data, comparison to the state-of-the-art taggers for Polish and error analysis. The proposed tagger shows significantly better results in this domain and demonstrates the viability of adaptation.</abstract>
<identifier type="citekey">walentynowicz-etal-2019-tagger</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_148</identifier>
<location>
<url>https://aclanthology.org/R19-1148</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>1295</start>
<end>1303</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tagger for Polish Computer Mediated Communication Texts
%A Walentynowicz, Wiktor
%A Piasecki, Maciej
%A Oleksy, Marcin
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F walentynowicz-etal-2019-tagger
%X In this paper we present a morpho-syntactic tagger dedicated to Computer-mediated Communication texts in Polish. Its construction is based on an expanded RNN-based neural network adapted to the work on noisy texts. Among several techniques, the tagger utilises fastText embedding vectors, sequential character embedding vectors, and Brown clustering for the coarse-grained representation of sentence structures. In addition a set of manually written rules was proposed for post-processing. The system was trained to disambiguate descriptions of words in relation to Parts of Speech tags together with the full morphological information in terms of values for the different grammatical categories. We present also evaluation of several model variants on the gold standard annotated CMC data, comparison to the state-of-the-art taggers for Polish and error analysis. The proposed tagger shows significantly better results in this domain and demonstrates the viability of adaptation.
%R 10.26615/978-954-452-056-4_148
%U https://aclanthology.org/R19-1148
%U https://doi.org/10.26615/978-954-452-056-4_148
%P 1295-1303
Markdown (Informal)
[Tagger for Polish Computer Mediated Communication Texts](https://aclanthology.org/R19-1148) (Walentynowicz et al., RANLP 2019)
ACL
- Wiktor Walentynowicz, Maciej Piasecki, and Marcin Oleksy. 2019. Tagger for Polish Computer Mediated Communication Texts. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019), pages 1295–1303, Varna, Bulgaria. INCOMA Ltd..