@inproceedings{riemenschneider-2025-beyond,
title = "Beyond Base Predictors: Using {LLM}s to Resolve Ambiguities in {A}kkadian Lemmatization",
author = "Riemenschneider, Frederick",
editor = "Anderson, Adam and
Gordin, Shai and
Li, Bin and
Liu, Yudong and
Passarotti, Marco C. and
Sprugnoli, Rachele",
booktitle = "Proceedings of the Second Workshop on Ancient Language Processing",
month = may,
year = "2025",
address = "The Albuquerque Convention Center, Laguna",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.alp-1.30/",
doi = "10.18653/v1/2025.alp-1.30",
pages = "226--231",
ISBN = "979-8-89176-235-0",
abstract = "We present a hybrid approach for Akkadian lemmatization in the EvaCun 2025 Shared Task that combines traditional NLP techniques with large language models (LLMs). Our system employs three Base Predictors{--}a dictionary lookup and two T5 models{--}to establish initial lemma candidates. For cases where these pre-dictors disagree (18.72{\%} of instances), we im-plement an LLM Resolution module, enhanced with direct access to the electronic Babylonian Library (eBL) dictionary entries. This module includes a Predictor component that generates initial lemma predictions based on dictionary information, and a Validator component that refines these predictions through contextual rea-soning. Error analysis reveals that the system struggles most with small differences (like cap-italization) and certain ambiguous logograms (like BI). Our work demonstrates the benefits of combining traditional NLP approaches with the reasoning capabilities of LLMs when provided with appropriate domain knowledge."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="riemenschneider-2025-beyond">
<titleInfo>
<title>Beyond Base Predictors: Using LLMs to Resolve Ambiguities in Akkadian Lemmatization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frederick</namePart>
<namePart type="family">Riemenschneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Ancient Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shai</namePart>
<namePart type="family">Gordin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yudong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">The Albuquerque Convention Center, Laguna</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-235-0</identifier>
</relatedItem>
<abstract>We present a hybrid approach for Akkadian lemmatization in the EvaCun 2025 Shared Task that combines traditional NLP techniques with large language models (LLMs). Our system employs three Base Predictors–a dictionary lookup and two T5 models–to establish initial lemma candidates. For cases where these pre-dictors disagree (18.72% of instances), we im-plement an LLM Resolution module, enhanced with direct access to the electronic Babylonian Library (eBL) dictionary entries. This module includes a Predictor component that generates initial lemma predictions based on dictionary information, and a Validator component that refines these predictions through contextual rea-soning. Error analysis reveals that the system struggles most with small differences (like cap-italization) and certain ambiguous logograms (like BI). Our work demonstrates the benefits of combining traditional NLP approaches with the reasoning capabilities of LLMs when provided with appropriate domain knowledge.</abstract>
<identifier type="citekey">riemenschneider-2025-beyond</identifier>
<identifier type="doi">10.18653/v1/2025.alp-1.30</identifier>
<location>
<url>https://aclanthology.org/2025.alp-1.30/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>226</start>
<end>231</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Base Predictors: Using LLMs to Resolve Ambiguities in Akkadian Lemmatization
%A Riemenschneider, Frederick
%Y Anderson, Adam
%Y Gordin, Shai
%Y Li, Bin
%Y Liu, Yudong
%Y Passarotti, Marco C.
%Y Sprugnoli, Rachele
%S Proceedings of the Second Workshop on Ancient Language Processing
%D 2025
%8 May
%I Association for Computational Linguistics
%C The Albuquerque Convention Center, Laguna
%@ 979-8-89176-235-0
%F riemenschneider-2025-beyond
%X We present a hybrid approach for Akkadian lemmatization in the EvaCun 2025 Shared Task that combines traditional NLP techniques with large language models (LLMs). Our system employs three Base Predictors–a dictionary lookup and two T5 models–to establish initial lemma candidates. For cases where these pre-dictors disagree (18.72% of instances), we im-plement an LLM Resolution module, enhanced with direct access to the electronic Babylonian Library (eBL) dictionary entries. This module includes a Predictor component that generates initial lemma predictions based on dictionary information, and a Validator component that refines these predictions through contextual rea-soning. Error analysis reveals that the system struggles most with small differences (like cap-italization) and certain ambiguous logograms (like BI). Our work demonstrates the benefits of combining traditional NLP approaches with the reasoning capabilities of LLMs when provided with appropriate domain knowledge.
%R 10.18653/v1/2025.alp-1.30
%U https://aclanthology.org/2025.alp-1.30/
%U https://doi.org/10.18653/v1/2025.alp-1.30
%P 226-231
Markdown (Informal)
[Beyond Base Predictors: Using LLMs to Resolve Ambiguities in Akkadian Lemmatization](https://aclanthology.org/2025.alp-1.30/) (Riemenschneider, ALP 2025)
ACL