@inproceedings{limisiewicz-2023-ufal,
title = "{{\'U}FAL} Submission for {SIGTYP} Supervised Cognate Detection Task",
author = "Limisiewicz, Tomasz",
editor = "Beinborn, Lisa and
Goswami, Koustava and
Murado{\u{g}}lu, Saliha and
Sorokin, Alexey and
Kumar, Ritesh and
Shcherbakov, Andreas and
Ponti, Edoardo M. and
Cotterell, Ryan and
Vylomova, Ekaterina",
booktitle = "Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigtyp-1.14",
doi = "10.18653/v1/2023.sigtyp-1.14",
pages = "132--136",
abstract = "In this work, I present {\'U}FAL submission for the supervised task of detecting cognates and derivatives. Cognates are word pairs in different languages sharing the origin in earlier attested forms in ancestral language, while derivatives come directly from another language. For the task, I developed gradient boosted tree classifier trained on linguistic and statistical features. The solution came first from two delivered systems with an 87{\%} F1 score on the test split. This write-up gives an insight into the system and shows the importance of using linguistic features and character-level statistics for the task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="limisiewicz-2023-ufal">
<titleInfo>
<title>ÚFAL Submission for SIGTYP Supervised Cognate Detection Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tomasz</namePart>
<namePart type="family">Limisiewicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Beinborn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustava</namePart>
<namePart type="family">Goswami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saliha</namePart>
<namePart type="family">Muradoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexey</namePart>
<namePart type="family">Sorokin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Shcherbakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, I present ÚFAL submission for the supervised task of detecting cognates and derivatives. Cognates are word pairs in different languages sharing the origin in earlier attested forms in ancestral language, while derivatives come directly from another language. For the task, I developed gradient boosted tree classifier trained on linguistic and statistical features. The solution came first from two delivered systems with an 87% F1 score on the test split. This write-up gives an insight into the system and shows the importance of using linguistic features and character-level statistics for the task.</abstract>
<identifier type="citekey">limisiewicz-2023-ufal</identifier>
<identifier type="doi">10.18653/v1/2023.sigtyp-1.14</identifier>
<location>
<url>https://aclanthology.org/2023.sigtyp-1.14</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>132</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ÚFAL Submission for SIGTYP Supervised Cognate Detection Task
%A Limisiewicz, Tomasz
%Y Beinborn, Lisa
%Y Goswami, Koustava
%Y Muradoğlu, Saliha
%Y Sorokin, Alexey
%Y Kumar, Ritesh
%Y Shcherbakov, Andreas
%Y Ponti, Edoardo M.
%Y Cotterell, Ryan
%Y Vylomova, Ekaterina
%S Proceedings of the 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F limisiewicz-2023-ufal
%X In this work, I present ÚFAL submission for the supervised task of detecting cognates and derivatives. Cognates are word pairs in different languages sharing the origin in earlier attested forms in ancestral language, while derivatives come directly from another language. For the task, I developed gradient boosted tree classifier trained on linguistic and statistical features. The solution came first from two delivered systems with an 87% F1 score on the test split. This write-up gives an insight into the system and shows the importance of using linguistic features and character-level statistics for the task.
%R 10.18653/v1/2023.sigtyp-1.14
%U https://aclanthology.org/2023.sigtyp-1.14
%U https://doi.org/10.18653/v1/2023.sigtyp-1.14
%P 132-136
Markdown (Informal)
[ÚFAL Submission for SIGTYP Supervised Cognate Detection Task](https://aclanthology.org/2023.sigtyp-1.14) (Limisiewicz, SIGTYP 2023)
ACL