@inproceedings{romero-razniewski-2022-children,
title = "Do Children Texts Hold The Key To Commonsense Knowledge?",
author = "Romero, Julien and
Razniewski, Simon",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.752",
doi = "10.18653/v1/2022.emnlp-main.752",
pages = "10954--10959",
abstract = "Compiling comprehensive repositories of commonsense knowledge is a long-standing problem in AI. Many concerns revolve around the issue of reporting bias, i.e., that frequency in text sources is not a good proxy for relevance or truth. This paper explores whether children{'}s texts hold the key to commonsense knowledge compilation, based on the hypothesis that such content makes fewer assumptions on the reader{'}s knowledge, and therefore spells out commonsense more explicitly. An analysis with several corpora shows that children{'}s texts indeed contain much more, and more typical commonsense assertions. Moreover, experiments show that this advantage can be leveraged in popular language-model-based commonsense knowledge extraction settings, where task-unspecific fine-tuning on small amounts of children texts (childBERT) already yields significant improvements. This provides a refreshing perspective different from the common trend of deriving progress from ever larger models and corpora.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="romero-razniewski-2022-children">
<titleInfo>
<title>Do Children Texts Hold The Key To Commonsense Knowledge?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Romero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Razniewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Compiling comprehensive repositories of commonsense knowledge is a long-standing problem in AI. Many concerns revolve around the issue of reporting bias, i.e., that frequency in text sources is not a good proxy for relevance or truth. This paper explores whether children’s texts hold the key to commonsense knowledge compilation, based on the hypothesis that such content makes fewer assumptions on the reader’s knowledge, and therefore spells out commonsense more explicitly. An analysis with several corpora shows that children’s texts indeed contain much more, and more typical commonsense assertions. Moreover, experiments show that this advantage can be leveraged in popular language-model-based commonsense knowledge extraction settings, where task-unspecific fine-tuning on small amounts of children texts (childBERT) already yields significant improvements. This provides a refreshing perspective different from the common trend of deriving progress from ever larger models and corpora.</abstract>
<identifier type="citekey">romero-razniewski-2022-children</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.752</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.752</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>10954</start>
<end>10959</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do Children Texts Hold The Key To Commonsense Knowledge?
%A Romero, Julien
%A Razniewski, Simon
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F romero-razniewski-2022-children
%X Compiling comprehensive repositories of commonsense knowledge is a long-standing problem in AI. Many concerns revolve around the issue of reporting bias, i.e., that frequency in text sources is not a good proxy for relevance or truth. This paper explores whether children’s texts hold the key to commonsense knowledge compilation, based on the hypothesis that such content makes fewer assumptions on the reader’s knowledge, and therefore spells out commonsense more explicitly. An analysis with several corpora shows that children’s texts indeed contain much more, and more typical commonsense assertions. Moreover, experiments show that this advantage can be leveraged in popular language-model-based commonsense knowledge extraction settings, where task-unspecific fine-tuning on small amounts of children texts (childBERT) already yields significant improvements. This provides a refreshing perspective different from the common trend of deriving progress from ever larger models and corpora.
%R 10.18653/v1/2022.emnlp-main.752
%U https://aclanthology.org/2022.emnlp-main.752
%U https://doi.org/10.18653/v1/2022.emnlp-main.752
%P 10954-10959
Markdown (Informal)
[Do Children Texts Hold The Key To Commonsense Knowledge?](https://aclanthology.org/2022.emnlp-main.752) (Romero & Razniewski, EMNLP 2022)
ACL
- Julien Romero and Simon Razniewski. 2022. Do Children Texts Hold The Key To Commonsense Knowledge?. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 10954–10959, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.