@article{castro-etal-2025-novel,
title = "A Novel Methodology for Enhancing Cross-language and Domain Adaptability in Temporal Expression Normalization",
author = "de Castro, Alejandro S{\'a}nchez and
Araujo, Lourdes and
Martinez-Romo, Juan",
journal = "Computational Linguistics",
volume = "51",
number = "4",
month = dec,
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.cl-4.7/",
doi = "10.1162/coli.a.12",
pages = "1303--1335",
abstract = "Accurate temporal expression normalization, the process of assigning a numerical value to a temporal expression, is essential for tasks such as timeline creation and temporal reasoning. While rule-based normalization systems are limited in adaptability across different domains and languages, deep-learning solutions in this area have not been extensively explored. An additional challenge is the scarcity of manually annotated corpora with temporal annotations. To address the adaptability limitations of current systems, we propose a highly adaptable methodology that can be applied to multiple domains and languages. This can be achieved by leveraging a multilingual Pre-trained Language Model (PTLM) with a fill-mask architecture, using a Value Intermediate Representation (VIR) where the temporal expression value format is adjusted to the fill-mask representation. Our approach involves a two-phase training process. Initially, the model is trained with a novel masking policy on a large English biomedical corpus that is automatically annotated with normalized temporal expressions, along with a complementary hand-crafted temporal expressions corpus. This addresses the lack of manually annotated data and helps to achieve sufficient capacity for adaptation to diverse domains or languages. In the second phase, we show how the model can be tailored to different domains and languages using various techniques, showcasing the versatility of the proposed methodology. This approach significantly outperforms existing systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="castro-etal-2025-novel">
<titleInfo>
<title>A Novel Methodology for Enhancing Cross-language and Domain Adaptability in Temporal Expression Normalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alejandro</namePart>
<namePart type="given">Sánchez</namePart>
<namePart type="family">de Castro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lourdes</namePart>
<namePart type="family">Araujo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Martinez-Romo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Accurate temporal expression normalization, the process of assigning a numerical value to a temporal expression, is essential for tasks such as timeline creation and temporal reasoning. While rule-based normalization systems are limited in adaptability across different domains and languages, deep-learning solutions in this area have not been extensively explored. An additional challenge is the scarcity of manually annotated corpora with temporal annotations. To address the adaptability limitations of current systems, we propose a highly adaptable methodology that can be applied to multiple domains and languages. This can be achieved by leveraging a multilingual Pre-trained Language Model (PTLM) with a fill-mask architecture, using a Value Intermediate Representation (VIR) where the temporal expression value format is adjusted to the fill-mask representation. Our approach involves a two-phase training process. Initially, the model is trained with a novel masking policy on a large English biomedical corpus that is automatically annotated with normalized temporal expressions, along with a complementary hand-crafted temporal expressions corpus. This addresses the lack of manually annotated data and helps to achieve sufficient capacity for adaptation to diverse domains or languages. In the second phase, we show how the model can be tailored to different domains and languages using various techniques, showcasing the versatility of the proposed methodology. This approach significantly outperforms existing systems.</abstract>
<identifier type="citekey">castro-etal-2025-novel</identifier>
<identifier type="doi">10.1162/coli.a.12</identifier>
<location>
<url>https://aclanthology.org/2025.cl-4.7/</url>
</location>
<part>
<date>2025-12</date>
<detail type="volume"><number>51</number></detail>
<detail type="issue"><number>4</number></detail>
<extent unit="page">
<start>1303</start>
<end>1335</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T A Novel Methodology for Enhancing Cross-language and Domain Adaptability in Temporal Expression Normalization
%A de Castro, Alejandro Sánchez
%A Araujo, Lourdes
%A Martinez-Romo, Juan
%J Computational Linguistics
%D 2025
%8 December
%V 51
%N 4
%I MIT Press
%C Cambridge, MA
%F castro-etal-2025-novel
%X Accurate temporal expression normalization, the process of assigning a numerical value to a temporal expression, is essential for tasks such as timeline creation and temporal reasoning. While rule-based normalization systems are limited in adaptability across different domains and languages, deep-learning solutions in this area have not been extensively explored. An additional challenge is the scarcity of manually annotated corpora with temporal annotations. To address the adaptability limitations of current systems, we propose a highly adaptable methodology that can be applied to multiple domains and languages. This can be achieved by leveraging a multilingual Pre-trained Language Model (PTLM) with a fill-mask architecture, using a Value Intermediate Representation (VIR) where the temporal expression value format is adjusted to the fill-mask representation. Our approach involves a two-phase training process. Initially, the model is trained with a novel masking policy on a large English biomedical corpus that is automatically annotated with normalized temporal expressions, along with a complementary hand-crafted temporal expressions corpus. This addresses the lack of manually annotated data and helps to achieve sufficient capacity for adaptation to diverse domains or languages. In the second phase, we show how the model can be tailored to different domains and languages using various techniques, showcasing the versatility of the proposed methodology. This approach significantly outperforms existing systems.
%R 10.1162/coli.a.12
%U https://aclanthology.org/2025.cl-4.7/
%U https://doi.org/10.1162/coli.a.12
%P 1303-1335
Markdown (Informal)
[A Novel Methodology for Enhancing Cross-language and Domain Adaptability in Temporal Expression Normalization](https://aclanthology.org/2025.cl-4.7/) (de Castro et al., CL 2025)
ACL