@inproceedings{cameron-etal-2026-analysing,
title = "Analysing {LLM}s for spelling normalization of 18th century {P}ortuguese",
author = "Cameron, Helena Freire and
Paes, Aline and
Olival, Fernanda and
Vieira, Renata",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.49/",
pages = "498--506",
ISBN = "979-8-89176-387-6",
abstract = "This paper presents an evaluation of large language models (LLMs) applied to the task of normalizing eighteenth-century written texts. Several LLMs were employed to process texts in pre-contemporary spellings and update them according to contemporary Portuguese orthography. Their outputs were rigorously compared against a curated reference corpus. The findings indicate marked disparities in model performance, with the Portuguese-specialized model Sabi{\'a} demonstrating a statistically significant advantage over multilingual alternatives."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cameron-etal-2026-analysing">
<titleInfo>
<title>Analysing LLMs for spelling normalization of 18th century Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="given">Freire</namePart>
<namePart type="family">Cameron</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Paes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernanda</namePart>
<namePart type="family">Olival</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Renata</namePart>
<namePart type="family">Vieira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>This paper presents an evaluation of large language models (LLMs) applied to the task of normalizing eighteenth-century written texts. Several LLMs were employed to process texts in pre-contemporary spellings and update them according to contemporary Portuguese orthography. Their outputs were rigorously compared against a curated reference corpus. The findings indicate marked disparities in model performance, with the Portuguese-specialized model Sabiá demonstrating a statistically significant advantage over multilingual alternatives.</abstract>
<identifier type="citekey">cameron-etal-2026-analysing</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.49/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>498</start>
<end>506</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analysing LLMs for spelling normalization of 18th century Portuguese
%A Cameron, Helena Freire
%A Paes, Aline
%A Olival, Fernanda
%A Vieira, Renata
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F cameron-etal-2026-analysing
%X This paper presents an evaluation of large language models (LLMs) applied to the task of normalizing eighteenth-century written texts. Several LLMs were employed to process texts in pre-contemporary spellings and update them according to contemporary Portuguese orthography. Their outputs were rigorously compared against a curated reference corpus. The findings indicate marked disparities in model performance, with the Portuguese-specialized model Sabiá demonstrating a statistically significant advantage over multilingual alternatives.
%U https://aclanthology.org/2026.propor-1.49/
%P 498-506
Markdown (Informal)
[Analysing LLMs for spelling normalization of 18th century Portuguese](https://aclanthology.org/2026.propor-1.49/) (Cameron et al., PROPOR 2026)
ACL
- Helena Freire Cameron, Aline Paes, Fernanda Olival, and Renata Vieira. 2026. Analysing LLMs for spelling normalization of 18th century Portuguese. In Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1, pages 498–506, Salvador, Brazil. Association for Computational Linguistics.