@inproceedings{borkakoty-espinosa-anke-2023-wikitide,
title = "{WIKITIDE}: A {W}ikipedia-Based Timestamped Definition Pairs Dataset",
author = "Borkakoty, Hsuvas and
Espinosa Anke, Luis",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.23",
pages = "207--216",
abstract = "A fundamental challenge in the current NLP context, dominated by language models, comes from the inflexibility of current architectures to {``}learn{''} new information. While model-centric solutions like continual learning or parameter-efficient fine-tuning are available, the question still remains of how to reliably identify changes in language or in the world. In this paper, we propose WikiTiDe, a dataset derived from pairs of timestamped definitions extracted from Wikipedia. We argue that such resources can be helpful for accelerating diachronic NLP, specifically, for training models able to scan knowledge resources for core updates concerning a concept, an event, or a named entity. Our proposed end-to-end method is fully automatic and leverages a bootstrapping algorithm for gradually creating a high-quality dataset. Our results suggest that bootstrapping the seed version of WikiTiDe leads to better-fine-tuned models. We also leverage fine-tuned models in a number of downstream tasks, showing promising results with respect to competitive baselines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="borkakoty-espinosa-anke-2023-wikitide">
<titleInfo>
<title>WIKITIDE: A Wikipedia-Based Timestamped Definition Pairs Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hsuvas</namePart>
<namePart type="family">Borkakoty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Espinosa Anke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A fundamental challenge in the current NLP context, dominated by language models, comes from the inflexibility of current architectures to “learn” new information. While model-centric solutions like continual learning or parameter-efficient fine-tuning are available, the question still remains of how to reliably identify changes in language or in the world. In this paper, we propose WikiTiDe, a dataset derived from pairs of timestamped definitions extracted from Wikipedia. We argue that such resources can be helpful for accelerating diachronic NLP, specifically, for training models able to scan knowledge resources for core updates concerning a concept, an event, or a named entity. Our proposed end-to-end method is fully automatic and leverages a bootstrapping algorithm for gradually creating a high-quality dataset. Our results suggest that bootstrapping the seed version of WikiTiDe leads to better-fine-tuned models. We also leverage fine-tuned models in a number of downstream tasks, showing promising results with respect to competitive baselines.</abstract>
<identifier type="citekey">borkakoty-espinosa-anke-2023-wikitide</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.23</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>207</start>
<end>216</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T WIKITIDE: A Wikipedia-Based Timestamped Definition Pairs Dataset
%A Borkakoty, Hsuvas
%A Espinosa Anke, Luis
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F borkakoty-espinosa-anke-2023-wikitide
%X A fundamental challenge in the current NLP context, dominated by language models, comes from the inflexibility of current architectures to “learn” new information. While model-centric solutions like continual learning or parameter-efficient fine-tuning are available, the question still remains of how to reliably identify changes in language or in the world. In this paper, we propose WikiTiDe, a dataset derived from pairs of timestamped definitions extracted from Wikipedia. We argue that such resources can be helpful for accelerating diachronic NLP, specifically, for training models able to scan knowledge resources for core updates concerning a concept, an event, or a named entity. Our proposed end-to-end method is fully automatic and leverages a bootstrapping algorithm for gradually creating a high-quality dataset. Our results suggest that bootstrapping the seed version of WikiTiDe leads to better-fine-tuned models. We also leverage fine-tuned models in a number of downstream tasks, showing promising results with respect to competitive baselines.
%U https://aclanthology.org/2023.ranlp-1.23
%P 207-216
Markdown (Informal)
[WIKITIDE: A Wikipedia-Based Timestamped Definition Pairs Dataset](https://aclanthology.org/2023.ranlp-1.23) (Borkakoty & Espinosa Anke, RANLP 2023)
ACL