@inproceedings{messner-lippincott-2024-pairing,
title = "Pairing Orthographically Variant Literary Words to Standard Equivalents Using Neural Edit Distance Models",
author = "Messner, Craig and
Lippincott, Thomas",
editor = "Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Szpakowicz, Stan",
booktitle = "Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.latechclfl-1.26",
pages = "264--269",
abstract = "We present a novel corpus consisting of orthographically variant words found in works of 19th century U.S. literature annotated with their corresponding {``}standard{''} word pair. We train a set of neural edit distance models to pair these variants with their standard forms, and compare the performance of these models to the performance of a set of neural edit distance models trained on a corpus of orthographic errors made by L2 English learners. Finally, we analyze the relative performance of these models in the light of different negative training sample generation strategies, and offer concluding remarks on the unique challenge literary orthographic variation poses to string pairing methodologies.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="messner-lippincott-2024-pairing">
<titleInfo>
<title>Pairing Orthographically Variant Literary Words to Standard Equivalents Using Neural Edit Distance Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Messner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Lippincott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a novel corpus consisting of orthographically variant words found in works of 19th century U.S. literature annotated with their corresponding “standard” word pair. We train a set of neural edit distance models to pair these variants with their standard forms, and compare the performance of these models to the performance of a set of neural edit distance models trained on a corpus of orthographic errors made by L2 English learners. Finally, we analyze the relative performance of these models in the light of different negative training sample generation strategies, and offer concluding remarks on the unique challenge literary orthographic variation poses to string pairing methodologies.</abstract>
<identifier type="citekey">messner-lippincott-2024-pairing</identifier>
<location>
<url>https://aclanthology.org/2024.latechclfl-1.26</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>264</start>
<end>269</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pairing Orthographically Variant Literary Words to Standard Equivalents Using Neural Edit Distance Models
%A Messner, Craig
%A Lippincott, Thomas
%Y Bizzoni, Yuri
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Szpakowicz, Stan
%S Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F messner-lippincott-2024-pairing
%X We present a novel corpus consisting of orthographically variant words found in works of 19th century U.S. literature annotated with their corresponding “standard” word pair. We train a set of neural edit distance models to pair these variants with their standard forms, and compare the performance of these models to the performance of a set of neural edit distance models trained on a corpus of orthographic errors made by L2 English learners. Finally, we analyze the relative performance of these models in the light of different negative training sample generation strategies, and offer concluding remarks on the unique challenge literary orthographic variation poses to string pairing methodologies.
%U https://aclanthology.org/2024.latechclfl-1.26
%P 264-269
Markdown (Informal)
[Pairing Orthographically Variant Literary Words to Standard Equivalents Using Neural Edit Distance Models](https://aclanthology.org/2024.latechclfl-1.26) (Messner & Lippincott, LaTeCHCLfL-WS 2024)
ACL