@inproceedings{svoboda-sevcikova-2024-parent,
title = "{P}a{R}e{NT} (Parent Retrieval Neural Tool): A Deep Dive into Word Formation across Languages",
author = "Svoboda, Emil and
Sevcikova, Magda",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1104",
pages = "12611--12621",
abstract = "We present PaReNT (Parent Retrieval Neural Tool), a deep-learning-based multilingual tool performing retrieval and word formation classification in English, German, Dutch, Spanish, French, Russian, and Czech. Parent retrieval refers to determining the lexeme or lexemes the input lexeme was based on (e.g. {``}darkness{''} is traced back to {``}dark{''}; {``}waterfall{''} decomposes into {``}water{''} and {``}fall{''}). Additionally, PaReNT performs word formation classification, which determines the input lexeme as a compound e.g. {``}proofread{''}, a derivative (e.g. {``}deescalate{''}) or as an unmotivated word (e.g. {``}dog{''}). These seven languages are selected from three major branches of the Indo-European language family (Germanic, Romance, Slavic). Data is aggregated from a range of word-formation resources, as well as Wiktionary, to train and test the tool. The tool is based on a custom-architecture hybrid transformer block-enriched sequence-to-sequence neural network utilizing both a character-based and semantic representation of the input lexemes, with two output modules - one decoder-based dedicated to parent retrieval, and one classifier-based for word formation classification. PaReNT achieves a mean accuracy of 0.62 in parent retrieval and a mean balanced accuracy of 0.74 in word formation classification.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="svoboda-sevcikova-2024-parent">
<titleInfo>
<title>PaReNT (Parent Retrieval Neural Tool): A Deep Dive into Word Formation across Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emil</namePart>
<namePart type="family">Svoboda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magda</namePart>
<namePart type="family">Sevcikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present PaReNT (Parent Retrieval Neural Tool), a deep-learning-based multilingual tool performing retrieval and word formation classification in English, German, Dutch, Spanish, French, Russian, and Czech. Parent retrieval refers to determining the lexeme or lexemes the input lexeme was based on (e.g. “darkness” is traced back to “dark”; “waterfall” decomposes into “water” and “fall”). Additionally, PaReNT performs word formation classification, which determines the input lexeme as a compound e.g. “proofread”, a derivative (e.g. “deescalate”) or as an unmotivated word (e.g. “dog”). These seven languages are selected from three major branches of the Indo-European language family (Germanic, Romance, Slavic). Data is aggregated from a range of word-formation resources, as well as Wiktionary, to train and test the tool. The tool is based on a custom-architecture hybrid transformer block-enriched sequence-to-sequence neural network utilizing both a character-based and semantic representation of the input lexemes, with two output modules - one decoder-based dedicated to parent retrieval, and one classifier-based for word formation classification. PaReNT achieves a mean accuracy of 0.62 in parent retrieval and a mean balanced accuracy of 0.74 in word formation classification.</abstract>
<identifier type="citekey">svoboda-sevcikova-2024-parent</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1104</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>12611</start>
<end>12621</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PaReNT (Parent Retrieval Neural Tool): A Deep Dive into Word Formation across Languages
%A Svoboda, Emil
%A Sevcikova, Magda
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F svoboda-sevcikova-2024-parent
%X We present PaReNT (Parent Retrieval Neural Tool), a deep-learning-based multilingual tool performing retrieval and word formation classification in English, German, Dutch, Spanish, French, Russian, and Czech. Parent retrieval refers to determining the lexeme or lexemes the input lexeme was based on (e.g. “darkness” is traced back to “dark”; “waterfall” decomposes into “water” and “fall”). Additionally, PaReNT performs word formation classification, which determines the input lexeme as a compound e.g. “proofread”, a derivative (e.g. “deescalate”) or as an unmotivated word (e.g. “dog”). These seven languages are selected from three major branches of the Indo-European language family (Germanic, Romance, Slavic). Data is aggregated from a range of word-formation resources, as well as Wiktionary, to train and test the tool. The tool is based on a custom-architecture hybrid transformer block-enriched sequence-to-sequence neural network utilizing both a character-based and semantic representation of the input lexemes, with two output modules - one decoder-based dedicated to parent retrieval, and one classifier-based for word formation classification. PaReNT achieves a mean accuracy of 0.62 in parent retrieval and a mean balanced accuracy of 0.74 in word formation classification.
%U https://aclanthology.org/2024.lrec-main.1104
%P 12611-12621
Markdown (Informal)
[PaReNT (Parent Retrieval Neural Tool): A Deep Dive into Word Formation across Languages](https://aclanthology.org/2024.lrec-main.1104) (Svoboda & Sevcikova, LREC-COLING 2024)
ACL