@inproceedings{kweon-etal-2025-topic,
title = "Topic Coverage-based Demonstration Retrieval for In-Context Learning",
author = "Kweon, Wonbin and
Kang, SeongKu and
Tian, Runchu and
Jiang, Pengcheng and
Han, Jiawei and
Yu, Hwanjo",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1007/",
pages = "19911--19923",
ISBN = "979-8-89176-332-6",
abstract = "The effectiveness of in-context learning relies heavily on selecting demonstrations that provide all the necessary information for a given test input.To achieve this, it is crucial to identify and cover fine-grained knowledge requirements. However, prior methods often retrieve demonstrations based solely on embedding similarity or generation probability, resulting in irrelevant or redundant examples.In this paper, we propose TopicK, a topic coverage-based retrieval framework that selects demonstrations to comprehensively cover topic-level knowledge relevant to both the test input and the model.Specifically, TopicK estimates the topics required by the input and assesses the model{'}s knowledge on those topics.TopicK then iteratively selects demonstrations that introduce previously uncovered required topics, in which the model exhibits low topical knowledge.We validate the effectiveness of TopicK through extensive experiments across various datasets and both open- and closed-source LLMs.Our source code is available at https://github.com/WonbinKweon/TopicK{\_}EMNLP2025."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kweon-etal-2025-topic">
<titleInfo>
<title>Topic Coverage-based Demonstration Retrieval for In-Context Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wonbin</namePart>
<namePart type="family">Kweon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SeongKu</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Runchu</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengcheng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiawei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hwanjo</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>The effectiveness of in-context learning relies heavily on selecting demonstrations that provide all the necessary information for a given test input.To achieve this, it is crucial to identify and cover fine-grained knowledge requirements. However, prior methods often retrieve demonstrations based solely on embedding similarity or generation probability, resulting in irrelevant or redundant examples.In this paper, we propose TopicK, a topic coverage-based retrieval framework that selects demonstrations to comprehensively cover topic-level knowledge relevant to both the test input and the model.Specifically, TopicK estimates the topics required by the input and assesses the model’s knowledge on those topics.TopicK then iteratively selects demonstrations that introduce previously uncovered required topics, in which the model exhibits low topical knowledge.We validate the effectiveness of TopicK through extensive experiments across various datasets and both open- and closed-source LLMs.Our source code is available at https://github.com/WonbinKweon/TopicK_EMNLP2025.</abstract>
<identifier type="citekey">kweon-etal-2025-topic</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.1007/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>19911</start>
<end>19923</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Topic Coverage-based Demonstration Retrieval for In-Context Learning
%A Kweon, Wonbin
%A Kang, SeongKu
%A Tian, Runchu
%A Jiang, Pengcheng
%A Han, Jiawei
%A Yu, Hwanjo
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F kweon-etal-2025-topic
%X The effectiveness of in-context learning relies heavily on selecting demonstrations that provide all the necessary information for a given test input.To achieve this, it is crucial to identify and cover fine-grained knowledge requirements. However, prior methods often retrieve demonstrations based solely on embedding similarity or generation probability, resulting in irrelevant or redundant examples.In this paper, we propose TopicK, a topic coverage-based retrieval framework that selects demonstrations to comprehensively cover topic-level knowledge relevant to both the test input and the model.Specifically, TopicK estimates the topics required by the input and assesses the model’s knowledge on those topics.TopicK then iteratively selects demonstrations that introduce previously uncovered required topics, in which the model exhibits low topical knowledge.We validate the effectiveness of TopicK through extensive experiments across various datasets and both open- and closed-source LLMs.Our source code is available at https://github.com/WonbinKweon/TopicK_EMNLP2025.
%U https://aclanthology.org/2025.emnlp-main.1007/
%P 19911-19923
Markdown (Informal)
[Topic Coverage-based Demonstration Retrieval for In-Context Learning](https://aclanthology.org/2025.emnlp-main.1007/) (Kweon et al., EMNLP 2025)
ACL