@inproceedings{chatterjee-etal-2026-lost,
title = "Lost in Translation? Exploring the Shift in Grammatical Gender from {L}atin to {O}ccitan",
author = {Chatterjee, Ahan and
Sch{\"o}ffel, Matthias and
A{\ss}enmacher, Matthias and
Wiedner, Marinus and
Garces Arias, Esteban},
editor = {Hamilton, Sil and
{\"O}hman, Emily and
Hicke, Rebecca M. M. and
Bizzoni, Yuri and
Bax, Axel and
Matthews, Jacob A. and
H{\"a}m{\"a}l{\"a}inen, Mika},
booktitle = "Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities",
month = jul,
year = "2026",
address = "San Diego, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.nlp4dh-1.26/",
pages = "276--296",
ISBN = "979-8-89176-427-9",
abstract = "The diachronic evolution from Latin to the Romance languages involved a restructuring of the grammatical gender system from a tripartite configuration (masculine, feminine, neuter) to a bipartite one (masculine, feminine). In this work, we introduce an interpretable deep learning framework to investigate this phenomenon at both lexical and contextual levels. First, we show that conventional tokenization strategies are insufficiently robust for this low-resource historical setting, and that our proposed tokenizer improves performance over these baselines. At the lexical level, we evaluate the contribution of morphological features to gender prediction. At the contextual level, we quantify the contributions of different part-of-speech categories to grammatical gender prediction. Together, these analyses characterize the distribution of gender information between the lemma and its sentential context. We make our codebase, datasets, and results publicly available."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chatterjee-etal-2026-lost">
<titleInfo>
<title>Lost in Translation? Exploring the Shift in Grammatical Gender from Latin to Occitan</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahan</namePart>
<namePart type="family">Chatterjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Schöffel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Aßenmacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marinus</namePart>
<namePart type="family">Wiedner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Esteban</namePart>
<namePart type="family">Garces Arias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sil</namePart>
<namePart type="family">Hamilton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="given">M</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Hicke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Axel</namePart>
<namePart type="family">Bax</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Matthews</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-427-9</identifier>
</relatedItem>
<abstract>The diachronic evolution from Latin to the Romance languages involved a restructuring of the grammatical gender system from a tripartite configuration (masculine, feminine, neuter) to a bipartite one (masculine, feminine). In this work, we introduce an interpretable deep learning framework to investigate this phenomenon at both lexical and contextual levels. First, we show that conventional tokenization strategies are insufficiently robust for this low-resource historical setting, and that our proposed tokenizer improves performance over these baselines. At the lexical level, we evaluate the contribution of morphological features to gender prediction. At the contextual level, we quantify the contributions of different part-of-speech categories to grammatical gender prediction. Together, these analyses characterize the distribution of gender information between the lemma and its sentential context. We make our codebase, datasets, and results publicly available.</abstract>
<identifier type="citekey">chatterjee-etal-2026-lost</identifier>
<location>
<url>https://aclanthology.org/2026.nlp4dh-1.26/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>276</start>
<end>296</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lost in Translation? Exploring the Shift in Grammatical Gender from Latin to Occitan
%A Chatterjee, Ahan
%A Schöffel, Matthias
%A Aßenmacher, Matthias
%A Wiedner, Marinus
%A Garces Arias, Esteban
%Y Hamilton, Sil
%Y Öhman, Emily
%Y Hicke, Rebecca M. M.
%Y Bizzoni, Yuri
%Y Bax, Axel
%Y Matthews, Jacob A.
%Y Hämäläinen, Mika
%S Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, USA
%@ 979-8-89176-427-9
%F chatterjee-etal-2026-lost
%X The diachronic evolution from Latin to the Romance languages involved a restructuring of the grammatical gender system from a tripartite configuration (masculine, feminine, neuter) to a bipartite one (masculine, feminine). In this work, we introduce an interpretable deep learning framework to investigate this phenomenon at both lexical and contextual levels. First, we show that conventional tokenization strategies are insufficiently robust for this low-resource historical setting, and that our proposed tokenizer improves performance over these baselines. At the lexical level, we evaluate the contribution of morphological features to gender prediction. At the contextual level, we quantify the contributions of different part-of-speech categories to grammatical gender prediction. Together, these analyses characterize the distribution of gender information between the lemma and its sentential context. We make our codebase, datasets, and results publicly available.
%U https://aclanthology.org/2026.nlp4dh-1.26/
%P 276-296
Markdown (Informal)
[Lost in Translation? Exploring the Shift in Grammatical Gender from Latin to Occitan](https://aclanthology.org/2026.nlp4dh-1.26/) (Chatterjee et al., NLP4DH 2026)
ACL