@inproceedings{zhang-etal-2023-cikqa,
title = "{CIKQA}: Learning Commonsense Inference with a Unified Knowledge-in-the-loop {QA} Paradigm",
author = "Zhang, Hongming and
Huo, Yintong and
Elazar, Yanai and
Song, Yangqiu and
Goldberg, Yoav and
Roth, Dan",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.8",
doi = "10.18653/v1/2023.findings-eacl.8",
pages = "114--124",
abstract = "We propose a new commonsense reasoning benchmark to motivate commonsense reasoning progress from two perspectives: (1) Evaluating whether models can distinguish knowledge quality by predicting if the knowledge is enough to answer the question; (2) Evaluating whether models can develop commonsense inference capabilities that generalize across tasks. We first extract supporting knowledge for each question and ask humans to annotate whether the auto-extracted knowledge is enough to answer the question or not. After that, we convert different tasks into a unified question-answering format to evaluate the models{'} generalization capabilities. We name the benchmark Commonsense Inference with Knowledge-in-the-loop Question Answering ({\textbackslash}name). Experiments show that with our learning paradigm, models demonstrate encouraging generalization capabilities. At the same time, we also notice that distinguishing knowledge quality remains challenging for current commonsense reasoning models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2023-cikqa">
<titleInfo>
<title>CIKQA: Learning Commonsense Inference with a Unified Knowledge-in-the-loop QA Paradigm</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongming</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yintong</namePart>
<namePart type="family">Huo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yangqiu</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a new commonsense reasoning benchmark to motivate commonsense reasoning progress from two perspectives: (1) Evaluating whether models can distinguish knowledge quality by predicting if the knowledge is enough to answer the question; (2) Evaluating whether models can develop commonsense inference capabilities that generalize across tasks. We first extract supporting knowledge for each question and ask humans to annotate whether the auto-extracted knowledge is enough to answer the question or not. After that, we convert different tasks into a unified question-answering format to evaluate the models’ generalization capabilities. We name the benchmark Commonsense Inference with Knowledge-in-the-loop Question Answering (\textbackslashname). Experiments show that with our learning paradigm, models demonstrate encouraging generalization capabilities. At the same time, we also notice that distinguishing knowledge quality remains challenging for current commonsense reasoning models.</abstract>
<identifier type="citekey">zhang-etal-2023-cikqa</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.8</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.8</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>114</start>
<end>124</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CIKQA: Learning Commonsense Inference with a Unified Knowledge-in-the-loop QA Paradigm
%A Zhang, Hongming
%A Huo, Yintong
%A Elazar, Yanai
%A Song, Yangqiu
%A Goldberg, Yoav
%A Roth, Dan
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F zhang-etal-2023-cikqa
%X We propose a new commonsense reasoning benchmark to motivate commonsense reasoning progress from two perspectives: (1) Evaluating whether models can distinguish knowledge quality by predicting if the knowledge is enough to answer the question; (2) Evaluating whether models can develop commonsense inference capabilities that generalize across tasks. We first extract supporting knowledge for each question and ask humans to annotate whether the auto-extracted knowledge is enough to answer the question or not. After that, we convert different tasks into a unified question-answering format to evaluate the models’ generalization capabilities. We name the benchmark Commonsense Inference with Knowledge-in-the-loop Question Answering (\textbackslashname). Experiments show that with our learning paradigm, models demonstrate encouraging generalization capabilities. At the same time, we also notice that distinguishing knowledge quality remains challenging for current commonsense reasoning models.
%R 10.18653/v1/2023.findings-eacl.8
%U https://aclanthology.org/2023.findings-eacl.8
%U https://doi.org/10.18653/v1/2023.findings-eacl.8
%P 114-124
Markdown (Informal)
[CIKQA: Learning Commonsense Inference with a Unified Knowledge-in-the-loop QA Paradigm](https://aclanthology.org/2023.findings-eacl.8) (Zhang et al., Findings 2023)
ACL