@inproceedings{dellert-2018-combining,
title = "Combining Information-Weighted Sequence Alignment and Sound Correspondence Models for Improved Cognate Detection",
author = "Dellert, Johannes",
editor = "Bender, Emily M. and
Derczynski, Leon and
Isabelle, Pierre",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/C18-1264",
pages = "3123--3133",
abstract = "Methods for automated cognate detection in historical linguistics invariably build on some measure of form similarity which is designed to capture the remaining systematic similarities between cognate word forms after thousands of years of divergence. A wide range of clustering and classification algorithms has been explored for the purpose, whereas possible improvements on the level of pairwise form similarity measures have not been the main focus of research. The approach presented in this paper improves on this core component of cognate detection systems by a novel combination of information weighting, a technique for putting less weight on reoccurring morphological material, with sound correspondence modeling by means of pointwise mutual information. In evaluations on expert cognacy judgments over a subset of the IPA-encoded NorthEuraLex database, the combination of both techniques is shown to lead to considerable improvements in average precision for binary cognate detection, and modest improvements for distance-based cognate clustering.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dellert-2018-combining">
<titleInfo>
<title>Combining Information-Weighted Sequence Alignment and Sound Correspondence Models for Improved Cognate Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Dellert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 27th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Isabelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Methods for automated cognate detection in historical linguistics invariably build on some measure of form similarity which is designed to capture the remaining systematic similarities between cognate word forms after thousands of years of divergence. A wide range of clustering and classification algorithms has been explored for the purpose, whereas possible improvements on the level of pairwise form similarity measures have not been the main focus of research. The approach presented in this paper improves on this core component of cognate detection systems by a novel combination of information weighting, a technique for putting less weight on reoccurring morphological material, with sound correspondence modeling by means of pointwise mutual information. In evaluations on expert cognacy judgments over a subset of the IPA-encoded NorthEuraLex database, the combination of both techniques is shown to lead to considerable improvements in average precision for binary cognate detection, and modest improvements for distance-based cognate clustering.</abstract>
<identifier type="citekey">dellert-2018-combining</identifier>
<location>
<url>https://aclanthology.org/C18-1264</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>3123</start>
<end>3133</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Combining Information-Weighted Sequence Alignment and Sound Correspondence Models for Improved Cognate Detection
%A Dellert, Johannes
%Y Bender, Emily M.
%Y Derczynski, Leon
%Y Isabelle, Pierre
%S Proceedings of the 27th International Conference on Computational Linguistics
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F dellert-2018-combining
%X Methods for automated cognate detection in historical linguistics invariably build on some measure of form similarity which is designed to capture the remaining systematic similarities between cognate word forms after thousands of years of divergence. A wide range of clustering and classification algorithms has been explored for the purpose, whereas possible improvements on the level of pairwise form similarity measures have not been the main focus of research. The approach presented in this paper improves on this core component of cognate detection systems by a novel combination of information weighting, a technique for putting less weight on reoccurring morphological material, with sound correspondence modeling by means of pointwise mutual information. In evaluations on expert cognacy judgments over a subset of the IPA-encoded NorthEuraLex database, the combination of both techniques is shown to lead to considerable improvements in average precision for binary cognate detection, and modest improvements for distance-based cognate clustering.
%U https://aclanthology.org/C18-1264
%P 3123-3133
Markdown (Informal)
[Combining Information-Weighted Sequence Alignment and Sound Correspondence Models for Improved Cognate Detection](https://aclanthology.org/C18-1264) (Dellert, COLING 2018)
ACL