@inproceedings{temesgen-etal-2025-cognate,
title = "Cognate Detection for Historical Language Reconstruction of Proto-Sabean Languages: the Case of {G}e`ez, {T}igrinya, and {A}mharic",
author = "Temesgen, Elleni Sisay and
Nigatu, Hellina Hailu and
Andargie, Fitsum Assamnew",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.496/",
pages = "7415--7422",
abstract = "As languages evolve, we risk losing ancestral languages. In this paper, we explore Historical Language Reconstruction (HLR) for Proto-Sabean languages, starting with the identification of cognates{--}sets of words in different related languages that are derived from the same ancestral language. We (1) collect semantically related words in three Afro-Semitic languages from a three-way dictionary (2) work with linguists to identify cognates and reconstruct the proto-form of the cognates, (3) experiment with three automatic cognate detection methods and extract cognates from the semantically related words. We then experiment with in-context learning with GPT-4o to generate the proto-language from the cognates and use Sequence-to-Sequence (Seq2Seq) models for HLR."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="temesgen-etal-2025-cognate">
<titleInfo>
<title>Cognate Detection for Historical Language Reconstruction of Proto-Sabean Languages: the Case of Ge‘ez, Tigrinya, and Amharic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elleni</namePart>
<namePart type="given">Sisay</namePart>
<namePart type="family">Temesgen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hellina</namePart>
<namePart type="given">Hailu</namePart>
<namePart type="family">Nigatu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fitsum</namePart>
<namePart type="given">Assamnew</namePart>
<namePart type="family">Andargie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As languages evolve, we risk losing ancestral languages. In this paper, we explore Historical Language Reconstruction (HLR) for Proto-Sabean languages, starting with the identification of cognates–sets of words in different related languages that are derived from the same ancestral language. We (1) collect semantically related words in three Afro-Semitic languages from a three-way dictionary (2) work with linguists to identify cognates and reconstruct the proto-form of the cognates, (3) experiment with three automatic cognate detection methods and extract cognates from the semantically related words. We then experiment with in-context learning with GPT-4o to generate the proto-language from the cognates and use Sequence-to-Sequence (Seq2Seq) models for HLR.</abstract>
<identifier type="citekey">temesgen-etal-2025-cognate</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.496/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>7415</start>
<end>7422</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cognate Detection for Historical Language Reconstruction of Proto-Sabean Languages: the Case of Ge‘ez, Tigrinya, and Amharic
%A Temesgen, Elleni Sisay
%A Nigatu, Hellina Hailu
%A Andargie, Fitsum Assamnew
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F temesgen-etal-2025-cognate
%X As languages evolve, we risk losing ancestral languages. In this paper, we explore Historical Language Reconstruction (HLR) for Proto-Sabean languages, starting with the identification of cognates–sets of words in different related languages that are derived from the same ancestral language. We (1) collect semantically related words in three Afro-Semitic languages from a three-way dictionary (2) work with linguists to identify cognates and reconstruct the proto-form of the cognates, (3) experiment with three automatic cognate detection methods and extract cognates from the semantically related words. We then experiment with in-context learning with GPT-4o to generate the proto-language from the cognates and use Sequence-to-Sequence (Seq2Seq) models for HLR.
%U https://aclanthology.org/2025.coling-main.496/
%P 7415-7422
Markdown (Informal)
[Cognate Detection for Historical Language Reconstruction of Proto-Sabean Languages: the Case of Ge’ez, Tigrinya, and Amharic](https://aclanthology.org/2025.coling-main.496/) (Temesgen et al., COLING 2025)
ACL