@inproceedings{lendvai-wick-2022-finetuning,
title = "Finetuning {L}atin {BERT} for Word Sense Disambiguation on the Thesaurus Linguae Latinae",
author = "Lendvai, Piroska and
Wick, Claudia",
editor = "Zock, Michael and
Chersoni, Emmanuele and
Hsu, Yu-Yin and
Santus, Enrico",
booktitle = "Proceedings of the Workshop on Cognitive Aspects of the Lexicon",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.cogalex-1.5",
doi = "10.18653/v1/2022.cogalex-1.5",
pages = "37--41",
abstract = "The Thesaurus Linguae Latinae (TLL) is a comprehensive monolingual dictionary that records contextualized meanings and usages of Latin words in antique sources at an unprecedented scale. We created a new dataset based on a subset of sense representations in the TLL, with which we finetuned the Latin-BERT neural language model (Bamman and Burns, 2020) on a supervised Word Sense Disambiguation task. We observe that the contextualized BERT representations finetuned on TLL data score better than static embeddings used in a bidirectional LSTM classifier on the same dataset, and that our per-lemma BERT models achieve higher and more robust performance than reported by Bamman and Burns (2020) based on data from a bilingual Latin dictionary. We demonstrate the differences in sense organizational principles between these two lexical resources, and report about our dataset construction and improved evaluation methodology.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lendvai-wick-2022-finetuning">
<titleInfo>
<title>Finetuning Latin BERT for Word Sense Disambiguation on the Thesaurus Linguae Latinae</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piroska</namePart>
<namePart type="family">Lendvai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Wick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Cognitive Aspects of the Lexicon</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Zock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuele</namePart>
<namePart type="family">Chersoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Yin</namePart>
<namePart type="family">Hsu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Thesaurus Linguae Latinae (TLL) is a comprehensive monolingual dictionary that records contextualized meanings and usages of Latin words in antique sources at an unprecedented scale. We created a new dataset based on a subset of sense representations in the TLL, with which we finetuned the Latin-BERT neural language model (Bamman and Burns, 2020) on a supervised Word Sense Disambiguation task. We observe that the contextualized BERT representations finetuned on TLL data score better than static embeddings used in a bidirectional LSTM classifier on the same dataset, and that our per-lemma BERT models achieve higher and more robust performance than reported by Bamman and Burns (2020) based on data from a bilingual Latin dictionary. We demonstrate the differences in sense organizational principles between these two lexical resources, and report about our dataset construction and improved evaluation methodology.</abstract>
<identifier type="citekey">lendvai-wick-2022-finetuning</identifier>
<identifier type="doi">10.18653/v1/2022.cogalex-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.cogalex-1.5</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>37</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Finetuning Latin BERT for Word Sense Disambiguation on the Thesaurus Linguae Latinae
%A Lendvai, Piroska
%A Wick, Claudia
%Y Zock, Michael
%Y Chersoni, Emmanuele
%Y Hsu, Yu-Yin
%Y Santus, Enrico
%S Proceedings of the Workshop on Cognitive Aspects of the Lexicon
%D 2022
%8 November
%I Association for Computational Linguistics
%C Taipei, Taiwan
%F lendvai-wick-2022-finetuning
%X The Thesaurus Linguae Latinae (TLL) is a comprehensive monolingual dictionary that records contextualized meanings and usages of Latin words in antique sources at an unprecedented scale. We created a new dataset based on a subset of sense representations in the TLL, with which we finetuned the Latin-BERT neural language model (Bamman and Burns, 2020) on a supervised Word Sense Disambiguation task. We observe that the contextualized BERT representations finetuned on TLL data score better than static embeddings used in a bidirectional LSTM classifier on the same dataset, and that our per-lemma BERT models achieve higher and more robust performance than reported by Bamman and Burns (2020) based on data from a bilingual Latin dictionary. We demonstrate the differences in sense organizational principles between these two lexical resources, and report about our dataset construction and improved evaluation methodology.
%R 10.18653/v1/2022.cogalex-1.5
%U https://aclanthology.org/2022.cogalex-1.5
%U https://doi.org/10.18653/v1/2022.cogalex-1.5
%P 37-41
Markdown (Informal)
[Finetuning Latin BERT for Word Sense Disambiguation on the Thesaurus Linguae Latinae](https://aclanthology.org/2022.cogalex-1.5) (Lendvai & Wick, CogALex 2022)
ACL