@inproceedings{mulloni-pekar-2006-automatic,
title = "Automatic Detection of Orthographics Cues for Cognate Recognition",
author = "Mulloni, Andrea and
Pekar, Viktor",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/676_pdf.pdf",
abstract = "Present-day machine translation technologies crucially depend on the size and quality of lexical resources. Much of recent research in the area has been concerned with methods to build bilingual dictionaries automatically. In this paper we propose a methodology for the automatic detection of cognates between two languages based solely on the orthography of words. From a set of known cognates, the method induces rules capturing regularities of orthographic mutations that a word undergoes when migrating from one language into the other. The rules are then applied as a preprocessing step before measuring the orthographic similarity between putative cognates. As a result, the method allows to achieve an improvement in the F-measure of 11,86{\%} in comparison with detecting cognates based only on the edit distance between them.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mulloni-pekar-2006-automatic">
<titleInfo>
<title>Automatic Detection of Orthographics Cues for Cognate Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Mulloni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viktor</namePart>
<namePart type="family">Pekar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Present-day machine translation technologies crucially depend on the size and quality of lexical resources. Much of recent research in the area has been concerned with methods to build bilingual dictionaries automatically. In this paper we propose a methodology for the automatic detection of cognates between two languages based solely on the orthography of words. From a set of known cognates, the method induces rules capturing regularities of orthographic mutations that a word undergoes when migrating from one language into the other. The rules are then applied as a preprocessing step before measuring the orthographic similarity between putative cognates. As a result, the method allows to achieve an improvement in the F-measure of 11,86% in comparison with detecting cognates based only on the edit distance between them.</abstract>
<identifier type="citekey">mulloni-pekar-2006-automatic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/676_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Detection of Orthographics Cues for Cognate Recognition
%A Mulloni, Andrea
%A Pekar, Viktor
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F mulloni-pekar-2006-automatic
%X Present-day machine translation technologies crucially depend on the size and quality of lexical resources. Much of recent research in the area has been concerned with methods to build bilingual dictionaries automatically. In this paper we propose a methodology for the automatic detection of cognates between two languages based solely on the orthography of words. From a set of known cognates, the method induces rules capturing regularities of orthographic mutations that a word undergoes when migrating from one language into the other. The rules are then applied as a preprocessing step before measuring the orthographic similarity between putative cognates. As a result, the method allows to achieve an improvement in the F-measure of 11,86% in comparison with detecting cognates based only on the edit distance between them.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/676_pdf.pdf
Markdown (Informal)
[Automatic Detection of Orthographics Cues for Cognate Recognition](http://www.lrec-conf.org/proceedings/lrec2006/pdf/676_pdf.pdf) (Mulloni & Pekar, LREC 2006)
ACL