@inproceedings{cristea-etal-2021-automatic-discrimination,
title = "Automatic Discrimination between Inherited and Borrowed {L}atin Words in {R}omance Languages",
author = "Cristea, Alina Maria and
Dinu, Liviu P. and
Georgescu, Simona and
Mihai, Mihnea-Lucian and
Uban, Ana Sabina",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.243",
doi = "10.18653/v1/2021.findings-emnlp.243",
pages = "2845--2855",
abstract = "In this paper, we address the problem of automatically discriminating between inherited and borrowed Latin words. We introduce a new dataset and investigate the case of Romance languages (Romanian, Italian, French, Spanish, Portuguese and Catalan), where words directly inherited from Latin coexist with words borrowed from Latin, and explore whether automatic discrimination between them is possible. Having entered the language at a later stage, borrowed words are no longer subject to historical sound shift rules, hence they are presumably less eroded, which is why we expect them to have a different intrinsic structure distinguishable by computational means. We employ several machine learning models to automatically discriminate between inherited and borrowed words and compare their performance with various feature sets. We analyze the models{'} predictive power on two versions of the datasets, orthographic and phonetic. We also investigate whether prior knowledge of the etymon provides better results, employing n-gram character features extracted from the word-etymon pairs and from their alignment.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cristea-etal-2021-automatic-discrimination">
<titleInfo>
<title>Automatic Discrimination between Inherited and Borrowed Latin Words in Romance Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Cristea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liviu</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Dinu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="family">Georgescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihnea-Lucian</namePart>
<namePart type="family">Mihai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="given">Sabina</namePart>
<namePart type="family">Uban</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we address the problem of automatically discriminating between inherited and borrowed Latin words. We introduce a new dataset and investigate the case of Romance languages (Romanian, Italian, French, Spanish, Portuguese and Catalan), where words directly inherited from Latin coexist with words borrowed from Latin, and explore whether automatic discrimination between them is possible. Having entered the language at a later stage, borrowed words are no longer subject to historical sound shift rules, hence they are presumably less eroded, which is why we expect them to have a different intrinsic structure distinguishable by computational means. We employ several machine learning models to automatically discriminate between inherited and borrowed words and compare their performance with various feature sets. We analyze the models’ predictive power on two versions of the datasets, orthographic and phonetic. We also investigate whether prior knowledge of the etymon provides better results, employing n-gram character features extracted from the word-etymon pairs and from their alignment.</abstract>
<identifier type="citekey">cristea-etal-2021-automatic-discrimination</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.243</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.243</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>2845</start>
<end>2855</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Discrimination between Inherited and Borrowed Latin Words in Romance Languages
%A Cristea, Alina Maria
%A Dinu, Liviu P.
%A Georgescu, Simona
%A Mihai, Mihnea-Lucian
%A Uban, Ana Sabina
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F cristea-etal-2021-automatic-discrimination
%X In this paper, we address the problem of automatically discriminating between inherited and borrowed Latin words. We introduce a new dataset and investigate the case of Romance languages (Romanian, Italian, French, Spanish, Portuguese and Catalan), where words directly inherited from Latin coexist with words borrowed from Latin, and explore whether automatic discrimination between them is possible. Having entered the language at a later stage, borrowed words are no longer subject to historical sound shift rules, hence they are presumably less eroded, which is why we expect them to have a different intrinsic structure distinguishable by computational means. We employ several machine learning models to automatically discriminate between inherited and borrowed words and compare their performance with various feature sets. We analyze the models’ predictive power on two versions of the datasets, orthographic and phonetic. We also investigate whether prior knowledge of the etymon provides better results, employing n-gram character features extracted from the word-etymon pairs and from their alignment.
%R 10.18653/v1/2021.findings-emnlp.243
%U https://aclanthology.org/2021.findings-emnlp.243
%U https://doi.org/10.18653/v1/2021.findings-emnlp.243
%P 2845-2855
Markdown (Informal)
[Automatic Discrimination between Inherited and Borrowed Latin Words in Romance Languages](https://aclanthology.org/2021.findings-emnlp.243) (Cristea et al., Findings 2021)
ACL