@inproceedings{babych-2018-development,
title = "Development and evaluation of phonological models for cognate identication",
author = "Babych, Bogdan",
editor = "P{\'e}rez-Ortiz, Juan Antonio and
S{\'a}nchez-Mart{\'\i}nez, Felipe and
Espl{\`a}-Gomis, Miquel and
Popovi{\'c}, Maja and
Rico, Celia and
Martins, Andr{\'e} and
Van den Bogaert, Joachim and
Forcada, Mikel L.",
booktitle = "Proceedings of the 21st Annual Conference of the European Association for Machine Translation",
month = may,
year = "2018",
address = "Alicante, Spain",
url = "https://aclanthology.org/2018.eamt-main.4",
pages = "61--68",
abstract = "The paper presents a methodology for the development and task-based evaluation of phonological models, which improve the accuracy of cognate terminology identification, but may potentially be used for other applications, such as transliteration or improving character-based NMT. Terminology translation remains a bottleneck for MT, especially for under-resourced languages and domains, and automated identification of cognate terms addresses this problem. The proposed phonological models explicitly represent distinctive phonological features for each character, such as acoustic types (e.g., vowel/ consonant, voiced/ unvoiced/ sonant), place and manner of articulation (closed/open, front/back vowel; plosive, fricative, or labial, dental, glottal consonant). The advantage of such representations is that they explicate information about characters{'} internal structure rather than treat them as elementary atomic units of comparison, placing graphemes into a feature space that provides additional information about their articulatory (pronunciation-based) or acoustic (soundbased) distances and similarity. The article presents experimental results of using the proposed phonological models for extracting cognate terminology with the phonologically aware Levenshtein edit distance, which for Top-1 cognate ranking metric outperforms the baseline character-based Levenshtein by 16.5{\%}. Project resources are released on: \url{https://github.com/bogdanbabych/cognates-phonology}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="babych-2018-development">
<titleInfo>
<title>Development and evaluation of phonological models for cognate identication</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bogdan</namePart>
<namePart type="family">Babych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Antonio</namePart>
<namePart type="family">Pérez-Ortiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felipe</namePart>
<namePart type="family">Sánchez-Martínez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miquel</namePart>
<namePart type="family">Esplà-Gomis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Celia</namePart>
<namePart type="family">Rico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joachim</namePart>
<namePart type="family">Van den Bogaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Forcada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Alicante, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper presents a methodology for the development and task-based evaluation of phonological models, which improve the accuracy of cognate terminology identification, but may potentially be used for other applications, such as transliteration or improving character-based NMT. Terminology translation remains a bottleneck for MT, especially for under-resourced languages and domains, and automated identification of cognate terms addresses this problem. The proposed phonological models explicitly represent distinctive phonological features for each character, such as acoustic types (e.g., vowel/ consonant, voiced/ unvoiced/ sonant), place and manner of articulation (closed/open, front/back vowel; plosive, fricative, or labial, dental, glottal consonant). The advantage of such representations is that they explicate information about characters’ internal structure rather than treat them as elementary atomic units of comparison, placing graphemes into a feature space that provides additional information about their articulatory (pronunciation-based) or acoustic (soundbased) distances and similarity. The article presents experimental results of using the proposed phonological models for extracting cognate terminology with the phonologically aware Levenshtein edit distance, which for Top-1 cognate ranking metric outperforms the baseline character-based Levenshtein by 16.5%. Project resources are released on: https://github.com/bogdanbabych/cognates-phonology</abstract>
<identifier type="citekey">babych-2018-development</identifier>
<location>
<url>https://aclanthology.org/2018.eamt-main.4</url>
</location>
<part>
<date>2018-05</date>
<extent unit="page">
<start>61</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Development and evaluation of phonological models for cognate identication
%A Babych, Bogdan
%Y Pérez-Ortiz, Juan Antonio
%Y Sánchez-Martínez, Felipe
%Y Esplà-Gomis, Miquel
%Y Popović, Maja
%Y Rico, Celia
%Y Martins, André
%Y Van den Bogaert, Joachim
%Y Forcada, Mikel L.
%S Proceedings of the 21st Annual Conference of the European Association for Machine Translation
%D 2018
%8 May
%C Alicante, Spain
%F babych-2018-development
%X The paper presents a methodology for the development and task-based evaluation of phonological models, which improve the accuracy of cognate terminology identification, but may potentially be used for other applications, such as transliteration or improving character-based NMT. Terminology translation remains a bottleneck for MT, especially for under-resourced languages and domains, and automated identification of cognate terms addresses this problem. The proposed phonological models explicitly represent distinctive phonological features for each character, such as acoustic types (e.g., vowel/ consonant, voiced/ unvoiced/ sonant), place and manner of articulation (closed/open, front/back vowel; plosive, fricative, or labial, dental, glottal consonant). The advantage of such representations is that they explicate information about characters’ internal structure rather than treat them as elementary atomic units of comparison, placing graphemes into a feature space that provides additional information about their articulatory (pronunciation-based) or acoustic (soundbased) distances and similarity. The article presents experimental results of using the proposed phonological models for extracting cognate terminology with the phonologically aware Levenshtein edit distance, which for Top-1 cognate ranking metric outperforms the baseline character-based Levenshtein by 16.5%. Project resources are released on: https://github.com/bogdanbabych/cognates-phonology
%U https://aclanthology.org/2018.eamt-main.4
%P 61-68
Markdown (Informal)
[Development and evaluation of phonological models for cognate identication](https://aclanthology.org/2018.eamt-main.4) (Babych, EAMT 2018)
ACL