@inproceedings{nitschke-2021-restoring,
title = "Restoring the Sister: Reconstructing a Lexicon from Sister Languages using Neural Machine Translation",
author = "Nitschke, Remo",
editor = "Mager, Manuel and
Oncevay, Arturo and
Rios, Annette and
Ruiz, Ivan Vladimir Meza and
Palmer, Alexis and
Neubig, Graham and
Kann, Katharina",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.americasnlp-1.13",
doi = "10.18653/v1/2021.americasnlp-1.13",
pages = "122--130",
abstract = "The historical comparative method has a long history in historical linguists. It describes a process by which historical linguists aim to reverse-engineer the historical developments of language families in order to reconstruct proto-forms and familial relations between languages. In recent years, there have been multiple attempts to replicate this process through machine learning, especially in the realm of cognate detection (List et al., 2016; Ciobanu and Dinu, 2014; Rama et al., 2018). So far, most of these experiments aimed at actual reconstruction have attempted the prediction of a proto-form from the forms of the daughter languages (Ciobanu and Dinu, 2018; Meloni et al., 2019).. Here, we propose a reimplementation that uses modern related languages, or sisters, instead, to reconstruct the vocabulary of a target language. In particular, we show that we can reconstruct vocabulary of a target language by using a fairly small data set of parallel cognates from different sister languages, using a neural machine translation (NMT) architecture with a standard encoder-decoder setup. This effort is directly in furtherance of the goal to use machine learning tools to help under-served language communities in their efforts at reclaiming, preserving, or reconstructing their own languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nitschke-2021-restoring">
<titleInfo>
<title>Restoring the Sister: Reconstructing a Lexicon from Sister Languages using Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Remo</namePart>
<namePart type="family">Nitschke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annette</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="given">Meza</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The historical comparative method has a long history in historical linguists. It describes a process by which historical linguists aim to reverse-engineer the historical developments of language families in order to reconstruct proto-forms and familial relations between languages. In recent years, there have been multiple attempts to replicate this process through machine learning, especially in the realm of cognate detection (List et al., 2016; Ciobanu and Dinu, 2014; Rama et al., 2018). So far, most of these experiments aimed at actual reconstruction have attempted the prediction of a proto-form from the forms of the daughter languages (Ciobanu and Dinu, 2018; Meloni et al., 2019).. Here, we propose a reimplementation that uses modern related languages, or sisters, instead, to reconstruct the vocabulary of a target language. In particular, we show that we can reconstruct vocabulary of a target language by using a fairly small data set of parallel cognates from different sister languages, using a neural machine translation (NMT) architecture with a standard encoder-decoder setup. This effort is directly in furtherance of the goal to use machine learning tools to help under-served language communities in their efforts at reclaiming, preserving, or reconstructing their own languages.</abstract>
<identifier type="citekey">nitschke-2021-restoring</identifier>
<identifier type="doi">10.18653/v1/2021.americasnlp-1.13</identifier>
<location>
<url>https://aclanthology.org/2021.americasnlp-1.13</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>122</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Restoring the Sister: Reconstructing a Lexicon from Sister Languages using Neural Machine Translation
%A Nitschke, Remo
%Y Mager, Manuel
%Y Oncevay, Arturo
%Y Rios, Annette
%Y Ruiz, Ivan Vladimir Meza
%Y Palmer, Alexis
%Y Neubig, Graham
%Y Kann, Katharina
%S Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F nitschke-2021-restoring
%X The historical comparative method has a long history in historical linguists. It describes a process by which historical linguists aim to reverse-engineer the historical developments of language families in order to reconstruct proto-forms and familial relations between languages. In recent years, there have been multiple attempts to replicate this process through machine learning, especially in the realm of cognate detection (List et al., 2016; Ciobanu and Dinu, 2014; Rama et al., 2018). So far, most of these experiments aimed at actual reconstruction have attempted the prediction of a proto-form from the forms of the daughter languages (Ciobanu and Dinu, 2018; Meloni et al., 2019).. Here, we propose a reimplementation that uses modern related languages, or sisters, instead, to reconstruct the vocabulary of a target language. In particular, we show that we can reconstruct vocabulary of a target language by using a fairly small data set of parallel cognates from different sister languages, using a neural machine translation (NMT) architecture with a standard encoder-decoder setup. This effort is directly in furtherance of the goal to use machine learning tools to help under-served language communities in their efforts at reclaiming, preserving, or reconstructing their own languages.
%R 10.18653/v1/2021.americasnlp-1.13
%U https://aclanthology.org/2021.americasnlp-1.13
%U https://doi.org/10.18653/v1/2021.americasnlp-1.13
%P 122-130
Markdown (Informal)
[Restoring the Sister: Reconstructing a Lexicon from Sister Languages using Neural Machine Translation](https://aclanthology.org/2021.americasnlp-1.13) (Nitschke, AmericasNLP 2021)
ACL