@inproceedings{beloucif-etal-2023-using,
title = "Using {W}ikidata for Enhancing Compositionality in Pretrained Language Models",
author = "Beloucif, Meriem and
Bansal, Mihir and
Biemann, Chris",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.19",
pages = "170--178",
abstract = "One of the many advantages of pre-trained language models (PLMs) such as BERT and RoBERTa is their flexibility and contextual nature. These features give PLMs strong capabilities for representing lexical semantics. However, PLMs seem incapable of capturing high-level semantics in terms of compositionally. We show that when augmented with the relevant semantic knowledge, PMLs learn to capture a higher degree of lexical compositionality. We annotate a large dataset from Wikidata highlighting a type of semantic inference that is easy for humans to understand but difficult for PLMs, like the correlation between age and date of birth. We use this resource for finetuning DistilBERT, BERT large and RoBERTa. Our results show that the performance of PLMs against the test data continuously improves when augmented with such a rich resource. Our results are corroborated by a consistent improvement over most GLUE benchmark natural language understanding tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="beloucif-etal-2023-using">
<titleInfo>
<title>Using Wikidata for Enhancing Compositionality in Pretrained Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meriem</namePart>
<namePart type="family">Beloucif</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihir</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the many advantages of pre-trained language models (PLMs) such as BERT and RoBERTa is their flexibility and contextual nature. These features give PLMs strong capabilities for representing lexical semantics. However, PLMs seem incapable of capturing high-level semantics in terms of compositionally. We show that when augmented with the relevant semantic knowledge, PMLs learn to capture a higher degree of lexical compositionality. We annotate a large dataset from Wikidata highlighting a type of semantic inference that is easy for humans to understand but difficult for PLMs, like the correlation between age and date of birth. We use this resource for finetuning DistilBERT, BERT large and RoBERTa. Our results show that the performance of PLMs against the test data continuously improves when augmented with such a rich resource. Our results are corroborated by a consistent improvement over most GLUE benchmark natural language understanding tasks.</abstract>
<identifier type="citekey">beloucif-etal-2023-using</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.19</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>170</start>
<end>178</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Wikidata for Enhancing Compositionality in Pretrained Language Models
%A Beloucif, Meriem
%A Bansal, Mihir
%A Biemann, Chris
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F beloucif-etal-2023-using
%X One of the many advantages of pre-trained language models (PLMs) such as BERT and RoBERTa is their flexibility and contextual nature. These features give PLMs strong capabilities for representing lexical semantics. However, PLMs seem incapable of capturing high-level semantics in terms of compositionally. We show that when augmented with the relevant semantic knowledge, PMLs learn to capture a higher degree of lexical compositionality. We annotate a large dataset from Wikidata highlighting a type of semantic inference that is easy for humans to understand but difficult for PLMs, like the correlation between age and date of birth. We use this resource for finetuning DistilBERT, BERT large and RoBERTa. Our results show that the performance of PLMs against the test data continuously improves when augmented with such a rich resource. Our results are corroborated by a consistent improvement over most GLUE benchmark natural language understanding tasks.
%U https://aclanthology.org/2023.ranlp-1.19
%P 170-178
Markdown (Informal)
[Using Wikidata for Enhancing Compositionality in Pretrained Language Models](https://aclanthology.org/2023.ranlp-1.19) (Beloucif et al., RANLP 2023)
ACL