@inproceedings{macsween-caines-2020-expectation,
title = "An Expectation Maximisation Algorithm for Automated Cognate Detection",
author = "MacSween, Roddy and
Caines, Andrew",
editor = "Fern{\'a}ndez, Raquel and
Linzen, Tal",
booktitle = "Proceedings of the 24th Conference on Computational Natural Language Learning",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.conll-1.38",
doi = "10.18653/v1/2020.conll-1.38",
pages = "476--485",
abstract = "In historical linguistics, cognate detection is the task of determining whether sets of words have common etymological roots. Inspired by the comparative method used by human linguists, we develop a system for automated cognate detection that frames the task as an inference problem for a general statistical model consisting of observed data (potentially cognate pairs of words), latent variables (the cognacy status of pairs) and unknown global parameters (which sounds correspond between languages). We then give a specific instance of such a model along with an expectation-maximisation algorithm to infer its parameters. We evaluate our system on a corpus of 8140 cognate sets, finding the performance of our method to be comparable to the state of the art. We additionally carry out qualitative analysis demonstrating advantages it has over existing systems. We also suggest several ways our work could be extended within the general theoretical framework we propose.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="macsween-caines-2020-expectation">
<titleInfo>
<title>An Expectation Maximisation Algorithm for Automated Cognate Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roddy</namePart>
<namePart type="family">MacSween</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Caines</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In historical linguistics, cognate detection is the task of determining whether sets of words have common etymological roots. Inspired by the comparative method used by human linguists, we develop a system for automated cognate detection that frames the task as an inference problem for a general statistical model consisting of observed data (potentially cognate pairs of words), latent variables (the cognacy status of pairs) and unknown global parameters (which sounds correspond between languages). We then give a specific instance of such a model along with an expectation-maximisation algorithm to infer its parameters. We evaluate our system on a corpus of 8140 cognate sets, finding the performance of our method to be comparable to the state of the art. We additionally carry out qualitative analysis demonstrating advantages it has over existing systems. We also suggest several ways our work could be extended within the general theoretical framework we propose.</abstract>
<identifier type="citekey">macsween-caines-2020-expectation</identifier>
<identifier type="doi">10.18653/v1/2020.conll-1.38</identifier>
<location>
<url>https://aclanthology.org/2020.conll-1.38</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>476</start>
<end>485</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Expectation Maximisation Algorithm for Automated Cognate Detection
%A MacSween, Roddy
%A Caines, Andrew
%Y Fernández, Raquel
%Y Linzen, Tal
%S Proceedings of the 24th Conference on Computational Natural Language Learning
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F macsween-caines-2020-expectation
%X In historical linguistics, cognate detection is the task of determining whether sets of words have common etymological roots. Inspired by the comparative method used by human linguists, we develop a system for automated cognate detection that frames the task as an inference problem for a general statistical model consisting of observed data (potentially cognate pairs of words), latent variables (the cognacy status of pairs) and unknown global parameters (which sounds correspond between languages). We then give a specific instance of such a model along with an expectation-maximisation algorithm to infer its parameters. We evaluate our system on a corpus of 8140 cognate sets, finding the performance of our method to be comparable to the state of the art. We additionally carry out qualitative analysis demonstrating advantages it has over existing systems. We also suggest several ways our work could be extended within the general theoretical framework we propose.
%R 10.18653/v1/2020.conll-1.38
%U https://aclanthology.org/2020.conll-1.38
%U https://doi.org/10.18653/v1/2020.conll-1.38
%P 476-485
Markdown (Informal)
[An Expectation Maximisation Algorithm for Automated Cognate Detection](https://aclanthology.org/2020.conll-1.38) (MacSween & Caines, CoNLL 2020)
ACL