@inproceedings{dehouck-etal-2023-evosem-database,
title = "{E}vo{S}em: A database of polysemous cognate sets",
author = "Dehouck, Mathieu and
Fran{\c{c}}ois, Alex and
Kalyan, Siva and
Pastor, Martial and
Kletz, David",
editor = "Tahmasebi, Nina and
Montariol, Syrielle and
Dubossarsky, Haim and
Kutuzov, Andrey and
Hengchen, Simon and
Alfter, David and
Periti, Francesco and
Cassotti, Pierluigi",
booktitle = "Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.lchange-1.7",
doi = "10.18653/v1/2023.lchange-1.7",
pages = "66--75",
abstract = "Polysemies, or {``}colexifications{''}, are of great interest in cognitive and historical linguistics, since meanings that are frequently expressed by the same lexeme are likely to be conceptually similar, and lie along a common pathway of semantic change. We argue that these types of inferences can be more reliably drawn from polysemies of cognate sets (which we call {``}dialexifications{''}) than from polysemies of lexemes. After giving a precise definition of dialexification, we introduce Evosem, a cross-linguistic database of etymologies scraped from several online sources. Based on this database, we measure for each pair of senses how many cognate sets include them both {---} i.e. how often this pair of senses is {``}dialexified{''}. This allows us to construct a weighted dialexification graph for any set of senses, indicating the conceptual and historical closeness of each pair. We also present an online interface for browsing our database, including graphs and interactive tables. We then discuss potential applications to NLP tasks and to linguistic research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dehouck-etal-2023-evosem-database">
<titleInfo>
<title>EvoSem: A database of polysemous cognate sets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Dehouck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">François</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siva</namePart>
<namePart type="family">Kalyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martial</namePart>
<namePart type="family">Pastor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Kletz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Tahmasebi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Syrielle</namePart>
<namePart type="family">Montariol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haim</namePart>
<namePart type="family">Dubossarsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrey</namePart>
<namePart type="family">Kutuzov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Hengchen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Alfter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Periti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierluigi</namePart>
<namePart type="family">Cassotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Polysemies, or “colexifications”, are of great interest in cognitive and historical linguistics, since meanings that are frequently expressed by the same lexeme are likely to be conceptually similar, and lie along a common pathway of semantic change. We argue that these types of inferences can be more reliably drawn from polysemies of cognate sets (which we call “dialexifications”) than from polysemies of lexemes. After giving a precise definition of dialexification, we introduce Evosem, a cross-linguistic database of etymologies scraped from several online sources. Based on this database, we measure for each pair of senses how many cognate sets include them both — i.e. how often this pair of senses is “dialexified”. This allows us to construct a weighted dialexification graph for any set of senses, indicating the conceptual and historical closeness of each pair. We also present an online interface for browsing our database, including graphs and interactive tables. We then discuss potential applications to NLP tasks and to linguistic research.</abstract>
<identifier type="citekey">dehouck-etal-2023-evosem-database</identifier>
<identifier type="doi">10.18653/v1/2023.lchange-1.7</identifier>
<location>
<url>https://aclanthology.org/2023.lchange-1.7</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>66</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EvoSem: A database of polysemous cognate sets
%A Dehouck, Mathieu
%A François, Alex
%A Kalyan, Siva
%A Pastor, Martial
%A Kletz, David
%Y Tahmasebi, Nina
%Y Montariol, Syrielle
%Y Dubossarsky, Haim
%Y Kutuzov, Andrey
%Y Hengchen, Simon
%Y Alfter, David
%Y Periti, Francesco
%Y Cassotti, Pierluigi
%S Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F dehouck-etal-2023-evosem-database
%X Polysemies, or “colexifications”, are of great interest in cognitive and historical linguistics, since meanings that are frequently expressed by the same lexeme are likely to be conceptually similar, and lie along a common pathway of semantic change. We argue that these types of inferences can be more reliably drawn from polysemies of cognate sets (which we call “dialexifications”) than from polysemies of lexemes. After giving a precise definition of dialexification, we introduce Evosem, a cross-linguistic database of etymologies scraped from several online sources. Based on this database, we measure for each pair of senses how many cognate sets include them both — i.e. how often this pair of senses is “dialexified”. This allows us to construct a weighted dialexification graph for any set of senses, indicating the conceptual and historical closeness of each pair. We also present an online interface for browsing our database, including graphs and interactive tables. We then discuss potential applications to NLP tasks and to linguistic research.
%R 10.18653/v1/2023.lchange-1.7
%U https://aclanthology.org/2023.lchange-1.7
%U https://doi.org/10.18653/v1/2023.lchange-1.7
%P 66-75
Markdown (Informal)
[EvoSem: A database of polysemous cognate sets](https://aclanthology.org/2023.lchange-1.7) (Dehouck et al., LChange 2023)
ACL
- Mathieu Dehouck, Alex François, Siva Kalyan, Martial Pastor, and David Kletz. 2023. EvoSem: A database of polysemous cognate sets. In Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change, pages 66–75, Singapore. Association for Computational Linguistics.