@inproceedings{stoeckel-etal-2020-voting,
title = "Voting for {POS} tagging of {L}atin texts: Using the flair of {FLAIR} to better Ensemble Classifiers by Example of {L}atin",
author = "Stoeckel, Manuel and
Henlein, Alexander and
Hemati, Wahed and
Mehler, Alexander",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.lt4hala-1.21",
pages = "130--135",
abstract = "Despite the great importance of the Latin language in the past, there are relatively few resources available today to develop modern NLP tools for this language. Therefore, the EvaLatin Shared Task for Lemmatization and Part-of-Speech (POS) tagging was published in the LT4HALA workshop. In our work, we dealt with the second EvaLatin task, that is, POS tagging. Since most of the available Latin word embeddings were trained on either few or inaccurate data, we trained several embeddings on better data in the first step. Based on these embeddings, we trained several state-of-the-art taggers and used them as input for an ensemble classifier called LSTMVoter. We were able to achieve the best results for both the cross-genre and the cross-time task (90.64{\%} and 87.00{\%}) without using additional annotated data (closed modality). In the meantime, we further improved the system and achieved even better results (96.91{\%} on classical, 90.87{\%} on cross-genre and 87.35{\%} on cross-time).",
language = "English",
ISBN = "979-10-95546-53-5",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stoeckel-etal-2020-voting">
<titleInfo>
<title>Voting for POS tagging of Latin texts: Using the flair of FLAIR to better Ensemble Classifiers by Example of Latin</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Stoeckel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Henlein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wahed</namePart>
<namePart type="family">Hemati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Mehler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-53-5</identifier>
</relatedItem>
<abstract>Despite the great importance of the Latin language in the past, there are relatively few resources available today to develop modern NLP tools for this language. Therefore, the EvaLatin Shared Task for Lemmatization and Part-of-Speech (POS) tagging was published in the LT4HALA workshop. In our work, we dealt with the second EvaLatin task, that is, POS tagging. Since most of the available Latin word embeddings were trained on either few or inaccurate data, we trained several embeddings on better data in the first step. Based on these embeddings, we trained several state-of-the-art taggers and used them as input for an ensemble classifier called LSTMVoter. We were able to achieve the best results for both the cross-genre and the cross-time task (90.64% and 87.00%) without using additional annotated data (closed modality). In the meantime, we further improved the system and achieved even better results (96.91% on classical, 90.87% on cross-genre and 87.35% on cross-time).</abstract>
<identifier type="citekey">stoeckel-etal-2020-voting</identifier>
<location>
<url>https://aclanthology.org/2020.lt4hala-1.21</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>130</start>
<end>135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Voting for POS tagging of Latin texts: Using the flair of FLAIR to better Ensemble Classifiers by Example of Latin
%A Stoeckel, Manuel
%A Henlein, Alexander
%A Hemati, Wahed
%A Mehler, Alexander
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-53-5
%G English
%F stoeckel-etal-2020-voting
%X Despite the great importance of the Latin language in the past, there are relatively few resources available today to develop modern NLP tools for this language. Therefore, the EvaLatin Shared Task for Lemmatization and Part-of-Speech (POS) tagging was published in the LT4HALA workshop. In our work, we dealt with the second EvaLatin task, that is, POS tagging. Since most of the available Latin word embeddings were trained on either few or inaccurate data, we trained several embeddings on better data in the first step. Based on these embeddings, we trained several state-of-the-art taggers and used them as input for an ensemble classifier called LSTMVoter. We were able to achieve the best results for both the cross-genre and the cross-time task (90.64% and 87.00%) without using additional annotated data (closed modality). In the meantime, we further improved the system and achieved even better results (96.91% on classical, 90.87% on cross-genre and 87.35% on cross-time).
%U https://aclanthology.org/2020.lt4hala-1.21
%P 130-135
Markdown (Informal)
[Voting for POS tagging of Latin texts: Using the flair of FLAIR to better Ensemble Classifiers by Example of Latin](https://aclanthology.org/2020.lt4hala-1.21) (Stoeckel et al., LT4HALA 2020)
ACL