@inproceedings{de-toni-etal-2022-entities,
title = "Entities, Dates, and Languages: Zero-Shot on Historical Texts with T0",
author = "De Toni, Francesco and
Akiki, Christopher and
De La Rosa, Javier and
Fourrier, Cl{\'e}mentine and
Manjavacas, Enrique and
Schweter, Stefan and
Van Strien, Daniel",
editor = "Fan, Angela and
Ilic, Suzana and
Wolf, Thomas and
Gall{\'e}, Matthias",
booktitle = "Proceedings of BigScience Episode {\#}5 -- Workshop on Challenges {\&} Perspectives in Creating Large Language Models",
month = may,
year = "2022",
address = "virtual+Dublin",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.bigscience-1.7",
doi = "10.18653/v1/2022.bigscience-1.7",
pages = "75--83",
abstract = "In this work, we explore whether the recently demonstrated zero-shot abilities of the T0 model extend to Named Entity Recognition for out-of-distribution languages and time periods. Using a historical newspaper corpus in 3 languages as test-bed, we use prompts to extract possible named entities. Our results show that a naive approach for prompt-based zero-shot multilingual Named Entity Recognition is error-prone, but highlights the potential of such an approach for historical languages lacking labeled datasets. Moreover, we also find that T0-like models can be probed to predict the publication date and language of a document, which could be very relevant for the study of historical texts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-toni-etal-2022-entities">
<titleInfo>
<title>Entities, Dates, and Languages: Zero-Shot on Historical Texts with T0</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">De Toni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Akiki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Javier</namePart>
<namePart type="family">De La Rosa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clémentine</namePart>
<namePart type="family">Fourrier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrique</namePart>
<namePart type="family">Manjavacas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Schweter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Van Strien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzana</namePart>
<namePart type="family">Ilic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Gallé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">virtual+Dublin</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we explore whether the recently demonstrated zero-shot abilities of the T0 model extend to Named Entity Recognition for out-of-distribution languages and time periods. Using a historical newspaper corpus in 3 languages as test-bed, we use prompts to extract possible named entities. Our results show that a naive approach for prompt-based zero-shot multilingual Named Entity Recognition is error-prone, but highlights the potential of such an approach for historical languages lacking labeled datasets. Moreover, we also find that T0-like models can be probed to predict the publication date and language of a document, which could be very relevant for the study of historical texts.</abstract>
<identifier type="citekey">de-toni-etal-2022-entities</identifier>
<identifier type="doi">10.18653/v1/2022.bigscience-1.7</identifier>
<location>
<url>https://aclanthology.org/2022.bigscience-1.7</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>75</start>
<end>83</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Entities, Dates, and Languages: Zero-Shot on Historical Texts with T0
%A De Toni, Francesco
%A Akiki, Christopher
%A De La Rosa, Javier
%A Fourrier, Clémentine
%A Manjavacas, Enrique
%A Schweter, Stefan
%A Van Strien, Daniel
%Y Fan, Angela
%Y Ilic, Suzana
%Y Wolf, Thomas
%Y Gallé, Matthias
%S Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models
%D 2022
%8 May
%I Association for Computational Linguistics
%C virtual+Dublin
%F de-toni-etal-2022-entities
%X In this work, we explore whether the recently demonstrated zero-shot abilities of the T0 model extend to Named Entity Recognition for out-of-distribution languages and time periods. Using a historical newspaper corpus in 3 languages as test-bed, we use prompts to extract possible named entities. Our results show that a naive approach for prompt-based zero-shot multilingual Named Entity Recognition is error-prone, but highlights the potential of such an approach for historical languages lacking labeled datasets. Moreover, we also find that T0-like models can be probed to predict the publication date and language of a document, which could be very relevant for the study of historical texts.
%R 10.18653/v1/2022.bigscience-1.7
%U https://aclanthology.org/2022.bigscience-1.7
%U https://doi.org/10.18653/v1/2022.bigscience-1.7
%P 75-83
Markdown (Informal)
[Entities, Dates, and Languages: Zero-Shot on Historical Texts with T0](https://aclanthology.org/2022.bigscience-1.7) (De Toni et al., BigScience 2022)
ACL
- Francesco De Toni, Christopher Akiki, Javier De La Rosa, Clémentine Fourrier, Enrique Manjavacas, Stefan Schweter, and Daniel Van Strien. 2022. Entities, Dates, and Languages: Zero-Shot on Historical Texts with T0. In Proceedings of BigScience Episode #5 -- Workshop on Challenges & Perspectives in Creating Large Language Models, pages 75–83, virtual+Dublin. Association for Computational Linguistics.