@inproceedings{marr-mortensen-2020-computerized,
title = "Computerized Forward Reconstruction for Analysis in Diachronic Phonology, and {L}atin to {F}rench Reflex Prediction",
author = "Marr, Clayton and
Mortensen, David R.",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.lt4hala-1.5",
pages = "28--36",
abstract = "Traditionally, historical phonologists have relied on tedious manual derivations to calibrate the sequences of sound changes that shaped the phonological evolution of languages. However, humans are prone to errors, and cannot track thousands of parallel word derivations in any efficient manner. We propose to instead automatically derive each lexical item in parallel, and we demonstrate forward reconstruction as both a computational task with metrics to optimize, and as an empirical tool for inquiry. For this end we present DiaSim, a user-facing application that simulates {``}cascades{''} of diachronic developments over a language{'}s lexicon and provides diagnostics for {``}debugging{''} those cascades. We test our methodology on a Latin-to-French reflex prediction task, using a newly compiled dataset FLLex with 1368 paired Latin/French forms. We also present, FLLAPS, which maps 310 Latin reflexes through five stages until Modern French, derived from Pope (1934){'}s sound tables. Our publicly available rule cascades include the baselines BaseCLEF and BaseCLEF*, representing the received view of Latin to French development, and DiaCLEF, build by incremental corrections to BaseCLEF aided by DiaSim{'}s diagnostics. DiaCLEF vastly outperforms the baselines, improving final accuracy on FLLex from 3.2{\%}to 84.9{\%}, and similar improvements across FLLAPS{'} stages.",
language = "English",
ISBN = "979-10-95546-53-5",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marr-mortensen-2020-computerized">
<titleInfo>
<title>Computerized Forward Reconstruction for Analysis in Diachronic Phonology, and Latin to French Reflex Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Clayton</namePart>
<namePart type="family">Marr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Mortensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-53-5</identifier>
</relatedItem>
<abstract>Traditionally, historical phonologists have relied on tedious manual derivations to calibrate the sequences of sound changes that shaped the phonological evolution of languages. However, humans are prone to errors, and cannot track thousands of parallel word derivations in any efficient manner. We propose to instead automatically derive each lexical item in parallel, and we demonstrate forward reconstruction as both a computational task with metrics to optimize, and as an empirical tool for inquiry. For this end we present DiaSim, a user-facing application that simulates “cascades” of diachronic developments over a language’s lexicon and provides diagnostics for “debugging” those cascades. We test our methodology on a Latin-to-French reflex prediction task, using a newly compiled dataset FLLex with 1368 paired Latin/French forms. We also present, FLLAPS, which maps 310 Latin reflexes through five stages until Modern French, derived from Pope (1934)’s sound tables. Our publicly available rule cascades include the baselines BaseCLEF and BaseCLEF*, representing the received view of Latin to French development, and DiaCLEF, build by incremental corrections to BaseCLEF aided by DiaSim’s diagnostics. DiaCLEF vastly outperforms the baselines, improving final accuracy on FLLex from 3.2%to 84.9%, and similar improvements across FLLAPS’ stages.</abstract>
<identifier type="citekey">marr-mortensen-2020-computerized</identifier>
<location>
<url>https://aclanthology.org/2020.lt4hala-1.5</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>28</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Computerized Forward Reconstruction for Analysis in Diachronic Phonology, and Latin to French Reflex Prediction
%A Marr, Clayton
%A Mortensen, David R.
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-53-5
%G English
%F marr-mortensen-2020-computerized
%X Traditionally, historical phonologists have relied on tedious manual derivations to calibrate the sequences of sound changes that shaped the phonological evolution of languages. However, humans are prone to errors, and cannot track thousands of parallel word derivations in any efficient manner. We propose to instead automatically derive each lexical item in parallel, and we demonstrate forward reconstruction as both a computational task with metrics to optimize, and as an empirical tool for inquiry. For this end we present DiaSim, a user-facing application that simulates “cascades” of diachronic developments over a language’s lexicon and provides diagnostics for “debugging” those cascades. We test our methodology on a Latin-to-French reflex prediction task, using a newly compiled dataset FLLex with 1368 paired Latin/French forms. We also present, FLLAPS, which maps 310 Latin reflexes through five stages until Modern French, derived from Pope (1934)’s sound tables. Our publicly available rule cascades include the baselines BaseCLEF and BaseCLEF*, representing the received view of Latin to French development, and DiaCLEF, build by incremental corrections to BaseCLEF aided by DiaSim’s diagnostics. DiaCLEF vastly outperforms the baselines, improving final accuracy on FLLex from 3.2%to 84.9%, and similar improvements across FLLAPS’ stages.
%U https://aclanthology.org/2020.lt4hala-1.5
%P 28-36
Markdown (Informal)
[Computerized Forward Reconstruction for Analysis in Diachronic Phonology, and Latin to French Reflex Prediction](https://aclanthology.org/2020.lt4hala-1.5) (Marr & Mortensen, LT4HALA 2020)
ACL