@inproceedings{kural-yuret-2024-unsupervised,
title = "Unsupervised Learning of {T}urkish Morphology with Multiple Codebook {VQ}-{VAE}",
author = {Kural, M{\"u}ge and
Yuret, Deniz},
editor = {Ataman, Duygu and
Derin, Mehmet Oguz and
Ivanova, Sardana and
K{\"o}ksal, Abdullatif and
S{\"a}lev{\"a}, Jonne and
Zeyrek, Deniz},
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Turkic Languages (SIGTURK 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sigturk-1.1",
pages = "1--17",
abstract = "This paper presents an interpretable unsupervised morphological learning model, showing comparable performance to supervised models in learning complex morphological rules of Turkish as evidenced by its application to the problem of morphological inflection within the SIGMORPHON Shared Tasks. The significance of our unsupervised approach lies in its alignment with how humans naturally acquire rules from raw data without supervision. To achieve this, we construct a model with multiple codebooks of VQ-VAE employing continuous and discrete latent variables during word generation. We evaluate the model{'}s performance under high and low-resource scenarios, and use probing techniques to examine encoded information in latent representations. We also evaluate its generalization capabilities by testing unseen suffixation scenarios within the SIGMORPHON-UniMorph 2022 Shared Task 0. Our results demonstrate our model{'}s ability to distinguish word structures into lemmas and suffixes, with each codebook specialized for different morphological features, contributing to the interpretability of our model and effectively performing morphological inflection on both seen and unseen morphological features.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kural-yuret-2024-unsupervised">
<titleInfo>
<title>Unsupervised Learning of Turkish Morphology with Multiple Codebook VQ-VAE</title>
</titleInfo>
<name type="personal">
<namePart type="given">Müge</namePart>
<namePart type="family">Kural</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Yuret</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Turkic Languages (SIGTURK 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Duygu</namePart>
<namePart type="family">Ataman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Oguz</namePart>
<namePart type="family">Derin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sardana</namePart>
<namePart type="family">Ivanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullatif</namePart>
<namePart type="family">Köksal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonne</namePart>
<namePart type="family">Sälevä</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Zeyrek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents an interpretable unsupervised morphological learning model, showing comparable performance to supervised models in learning complex morphological rules of Turkish as evidenced by its application to the problem of morphological inflection within the SIGMORPHON Shared Tasks. The significance of our unsupervised approach lies in its alignment with how humans naturally acquire rules from raw data without supervision. To achieve this, we construct a model with multiple codebooks of VQ-VAE employing continuous and discrete latent variables during word generation. We evaluate the model’s performance under high and low-resource scenarios, and use probing techniques to examine encoded information in latent representations. We also evaluate its generalization capabilities by testing unseen suffixation scenarios within the SIGMORPHON-UniMorph 2022 Shared Task 0. Our results demonstrate our model’s ability to distinguish word structures into lemmas and suffixes, with each codebook specialized for different morphological features, contributing to the interpretability of our model and effectively performing morphological inflection on both seen and unseen morphological features.</abstract>
<identifier type="citekey">kural-yuret-2024-unsupervised</identifier>
<location>
<url>https://aclanthology.org/2024.sigturk-1.1</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Learning of Turkish Morphology with Multiple Codebook VQ-VAE
%A Kural, Müge
%A Yuret, Deniz
%Y Ataman, Duygu
%Y Derin, Mehmet Oguz
%Y Ivanova, Sardana
%Y Köksal, Abdullatif
%Y Sälevä, Jonne
%Y Zeyrek, Deniz
%S Proceedings of the First Workshop on Natural Language Processing for Turkic Languages (SIGTURK 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand and Online
%F kural-yuret-2024-unsupervised
%X This paper presents an interpretable unsupervised morphological learning model, showing comparable performance to supervised models in learning complex morphological rules of Turkish as evidenced by its application to the problem of morphological inflection within the SIGMORPHON Shared Tasks. The significance of our unsupervised approach lies in its alignment with how humans naturally acquire rules from raw data without supervision. To achieve this, we construct a model with multiple codebooks of VQ-VAE employing continuous and discrete latent variables during word generation. We evaluate the model’s performance under high and low-resource scenarios, and use probing techniques to examine encoded information in latent representations. We also evaluate its generalization capabilities by testing unseen suffixation scenarios within the SIGMORPHON-UniMorph 2022 Shared Task 0. Our results demonstrate our model’s ability to distinguish word structures into lemmas and suffixes, with each codebook specialized for different morphological features, contributing to the interpretability of our model and effectively performing morphological inflection on both seen and unseen morphological features.
%U https://aclanthology.org/2024.sigturk-1.1
%P 1-17
Markdown (Informal)
[Unsupervised Learning of Turkish Morphology with Multiple Codebook VQ-VAE](https://aclanthology.org/2024.sigturk-1.1) (Kural & Yuret, SIGTURK-WS 2024)
ACL