@inproceedings{scherrer-2023-character,
title = "Character alignment methods for dialect-to-standard normalization",
author = "Scherrer, Yves",
editor = {Nicolai, Garrett and
Chodroff, Eleanor and
Mailhot, Frederic and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
booktitle = "Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigmorphon-1.12",
doi = "10.18653/v1/2023.sigmorphon-1.12",
pages = "110--116",
abstract = "This paper evaluates various character alignment methods on the task of sentence-level standardization of dialect transcriptions. We compare alignment methods from different scientific traditions (dialectometry, speech processing, machine translation) and apply them to Finnish, Norwegian and Swiss German dialect datasets. In the absence of gold alignments, we evaluate the methods on a set of characteristics that are deemed undesirable for the task. We find that trained alignment methods only show marginal benefits to simple Levenshtein distance. On this particular task, eflomal outperforms related methods such as GIZA++ or fast{\_}align by a large margin.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="scherrer-2023-character">
<titleInfo>
<title>Character alignment methods for dialect-to-standard normalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleanor</namePart>
<namePart type="family">Chodroff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Mailhot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper evaluates various character alignment methods on the task of sentence-level standardization of dialect transcriptions. We compare alignment methods from different scientific traditions (dialectometry, speech processing, machine translation) and apply them to Finnish, Norwegian and Swiss German dialect datasets. In the absence of gold alignments, we evaluate the methods on a set of characteristics that are deemed undesirable for the task. We find that trained alignment methods only show marginal benefits to simple Levenshtein distance. On this particular task, eflomal outperforms related methods such as GIZA++ or fast_align by a large margin.</abstract>
<identifier type="citekey">scherrer-2023-character</identifier>
<identifier type="doi">10.18653/v1/2023.sigmorphon-1.12</identifier>
<location>
<url>https://aclanthology.org/2023.sigmorphon-1.12</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>110</start>
<end>116</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Character alignment methods for dialect-to-standard normalization
%A Scherrer, Yves
%Y Nicolai, Garrett
%Y Chodroff, Eleanor
%Y Mailhot, Frederic
%Y Çöltekin, Çağrı
%S Proceedings of the 20th SIGMORPHON workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F scherrer-2023-character
%X This paper evaluates various character alignment methods on the task of sentence-level standardization of dialect transcriptions. We compare alignment methods from different scientific traditions (dialectometry, speech processing, machine translation) and apply them to Finnish, Norwegian and Swiss German dialect datasets. In the absence of gold alignments, we evaluate the methods on a set of characteristics that are deemed undesirable for the task. We find that trained alignment methods only show marginal benefits to simple Levenshtein distance. On this particular task, eflomal outperforms related methods such as GIZA++ or fast_align by a large margin.
%R 10.18653/v1/2023.sigmorphon-1.12
%U https://aclanthology.org/2023.sigmorphon-1.12
%U https://doi.org/10.18653/v1/2023.sigmorphon-1.12
%P 110-116
Markdown (Informal)
[Character alignment methods for dialect-to-standard normalization](https://aclanthology.org/2023.sigmorphon-1.12) (Scherrer, SIGMORPHON 2023)
ACL