@inproceedings{munhoz-etal-2026-f1,
title = "The F1 of Formula One: Applicability of Pre-trained {NER} Models to {B}razilian {TV} Interview Transcripts",
author = "Munhoz, Jo{\~a}o Pedro Gon{\c{c}}alves and
Oliveira, Luiz Felipe Guidorizzi de and
Belchior, Isabella and
Ruiz, Evandro Eduardo Seron and
Vale, Oto Ara{\'u}jo",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 2",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-2.40/",
pages = "303--311",
ISBN = "979-8-89176-387-6",
abstract = "Recorded interviews can capture their subjects' memories, perceptions, and emotions. When conducted with notable figures, they also have the potential to serve as a resource for interdisciplinary research, impacting various branches of science. In this work, we mark the beginning of a significant project analyzing interviews from the Roda Viva program, the longest-running interview show on Brazilian television. In this initial study, we examined six memorable interviews with six Brazilian Formula One drivers to compare the performance of two named entity recognition methods: a statistical-neural method and large language models, both evaluated against manual annotations. Still, it highlighted relevant qualitative distinctions: the statistical method showed a rigid dependence on capitalisation and lexical familiarity, leading to mechanical false positives and missing non-capitalised entities, while the LLM exhibited greater linguistic sensitivity, retrieving contextual entities and being robust to transcription errors, though it still produces false positives. The LLM-based model appears more promising due to its flexibility and the potential for refinement via instructions to filter for ambiguities, favouring the automation of social network extraction in the corpus."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="munhoz-etal-2026-f1">
<titleInfo>
<title>The F1 of Formula One: Applicability of Pre-trained NER Models to Brazilian TV Interview Transcripts</title>
</titleInfo>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="given">Pedro</namePart>
<namePart type="given">Gonçalves</namePart>
<namePart type="family">Munhoz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luiz</namePart>
<namePart type="given">Felipe</namePart>
<namePart type="given">Guidorizzi</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Oliveira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabella</namePart>
<namePart type="family">Belchior</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evandro</namePart>
<namePart type="given">Eduardo</namePart>
<namePart type="given">Seron</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oto</namePart>
<namePart type="given">Araújo</namePart>
<namePart type="family">Vale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Recorded interviews can capture their subjects’ memories, perceptions, and emotions. When conducted with notable figures, they also have the potential to serve as a resource for interdisciplinary research, impacting various branches of science. In this work, we mark the beginning of a significant project analyzing interviews from the Roda Viva program, the longest-running interview show on Brazilian television. In this initial study, we examined six memorable interviews with six Brazilian Formula One drivers to compare the performance of two named entity recognition methods: a statistical-neural method and large language models, both evaluated against manual annotations. Still, it highlighted relevant qualitative distinctions: the statistical method showed a rigid dependence on capitalisation and lexical familiarity, leading to mechanical false positives and missing non-capitalised entities, while the LLM exhibited greater linguistic sensitivity, retrieving contextual entities and being robust to transcription errors, though it still produces false positives. The LLM-based model appears more promising due to its flexibility and the potential for refinement via instructions to filter for ambiguities, favouring the automation of social network extraction in the corpus.</abstract>
<identifier type="citekey">munhoz-etal-2026-f1</identifier>
<location>
<url>https://aclanthology.org/2026.propor-2.40/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>303</start>
<end>311</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The F1 of Formula One: Applicability of Pre-trained NER Models to Brazilian TV Interview Transcripts
%A Munhoz, João Pedro Gonçalves
%A Oliveira, Luiz Felipe Guidorizzi de
%A Belchior, Isabella
%A Ruiz, Evandro Eduardo Seron
%A Vale, Oto Araújo
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F munhoz-etal-2026-f1
%X Recorded interviews can capture their subjects’ memories, perceptions, and emotions. When conducted with notable figures, they also have the potential to serve as a resource for interdisciplinary research, impacting various branches of science. In this work, we mark the beginning of a significant project analyzing interviews from the Roda Viva program, the longest-running interview show on Brazilian television. In this initial study, we examined six memorable interviews with six Brazilian Formula One drivers to compare the performance of two named entity recognition methods: a statistical-neural method and large language models, both evaluated against manual annotations. Still, it highlighted relevant qualitative distinctions: the statistical method showed a rigid dependence on capitalisation and lexical familiarity, leading to mechanical false positives and missing non-capitalised entities, while the LLM exhibited greater linguistic sensitivity, retrieving contextual entities and being robust to transcription errors, though it still produces false positives. The LLM-based model appears more promising due to its flexibility and the potential for refinement via instructions to filter for ambiguities, favouring the automation of social network extraction in the corpus.
%U https://aclanthology.org/2026.propor-2.40/
%P 303-311
Markdown (Informal)
[The F1 of Formula One: Applicability of Pre-trained NER Models to Brazilian TV Interview Transcripts](https://aclanthology.org/2026.propor-2.40/) (Munhoz et al., PROPOR 2026)
ACL