@inproceedings{koo-etal-2024-large,
title = "Where am {I}? Large Language Models Wandering between Semantics and Structures in Long Contexts",
author = "Koo, Seonmin and
Kim, Jinsung and
Jang, YoungJoon and
Park, Chanjun and
Lim, Heuiseok",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.783",
doi = "10.18653/v1/2024.emnlp-main.783",
pages = "14144--14160",
abstract = "As the utilization of Large Language Models (LLMs) becomes more widespread, there is a growing demand for their ability to handle more complex and longer external knowledge across various use cases. Most existing evaluations of the open-ended question answering (ODQA) task, which necessitates the use of external knowledge, focus solely on whether the model provides the correct answer. However, even when LLMs answer correctly, they often fail to provide an obvious source for their responses. Therefore, it is necessary to jointly evaluate and verify the correctness of the answers and the appropriateness of grounded evidence in complex external contexts. To address this issue, we examine the phenomenon of discrepancies in abilities across two distinct tasks{---}QA and evidence selection{---}when performed simultaneously, from the perspective of task alignment. To verify LLMs{'} task alignment, we introduce a verification framework and resources considering both semantic relevancy and structural diversity of the given long context knowledge. Through extensive experiments and detailed analysis, we provide insights into the task misalignment between QA and evidence selection. Our code and resources will be available upon acceptance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="koo-etal-2024-large">
<titleInfo>
<title>Where am I? Large Language Models Wandering between Semantics and Structures in Long Contexts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seonmin</namePart>
<namePart type="family">Koo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinsung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">YoungJoon</namePart>
<namePart type="family">Jang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chanjun</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heuiseok</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As the utilization of Large Language Models (LLMs) becomes more widespread, there is a growing demand for their ability to handle more complex and longer external knowledge across various use cases. Most existing evaluations of the open-ended question answering (ODQA) task, which necessitates the use of external knowledge, focus solely on whether the model provides the correct answer. However, even when LLMs answer correctly, they often fail to provide an obvious source for their responses. Therefore, it is necessary to jointly evaluate and verify the correctness of the answers and the appropriateness of grounded evidence in complex external contexts. To address this issue, we examine the phenomenon of discrepancies in abilities across two distinct tasks—QA and evidence selection—when performed simultaneously, from the perspective of task alignment. To verify LLMs’ task alignment, we introduce a verification framework and resources considering both semantic relevancy and structural diversity of the given long context knowledge. Through extensive experiments and detailed analysis, we provide insights into the task misalignment between QA and evidence selection. Our code and resources will be available upon acceptance.</abstract>
<identifier type="citekey">koo-etal-2024-large</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.783</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.783</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14144</start>
<end>14160</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Where am I? Large Language Models Wandering between Semantics and Structures in Long Contexts
%A Koo, Seonmin
%A Kim, Jinsung
%A Jang, YoungJoon
%A Park, Chanjun
%A Lim, Heuiseok
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F koo-etal-2024-large
%X As the utilization of Large Language Models (LLMs) becomes more widespread, there is a growing demand for their ability to handle more complex and longer external knowledge across various use cases. Most existing evaluations of the open-ended question answering (ODQA) task, which necessitates the use of external knowledge, focus solely on whether the model provides the correct answer. However, even when LLMs answer correctly, they often fail to provide an obvious source for their responses. Therefore, it is necessary to jointly evaluate and verify the correctness of the answers and the appropriateness of grounded evidence in complex external contexts. To address this issue, we examine the phenomenon of discrepancies in abilities across two distinct tasks—QA and evidence selection—when performed simultaneously, from the perspective of task alignment. To verify LLMs’ task alignment, we introduce a verification framework and resources considering both semantic relevancy and structural diversity of the given long context knowledge. Through extensive experiments and detailed analysis, we provide insights into the task misalignment between QA and evidence selection. Our code and resources will be available upon acceptance.
%R 10.18653/v1/2024.emnlp-main.783
%U https://aclanthology.org/2024.emnlp-main.783
%U https://doi.org/10.18653/v1/2024.emnlp-main.783
%P 14144-14160
Markdown (Informal)
[Where am I? Large Language Models Wandering between Semantics and Structures in Long Contexts](https://aclanthology.org/2024.emnlp-main.783) (Koo et al., EMNLP 2024)
ACL