@inproceedings{zhuang-kennington-2024-understanding,
title = "Understanding Survey Paper Taxonomy about Large Language Models via Graph Representation Learning",
author = "Zhuang, Jun and
Kennington, Casey",
editor = "Ghosal, Tirthankar and
Singh, Amanpreet and
Waard, Anita and
Mayr, Philipp and
Naik, Aakanksha and
Weller, Orion and
Lee, Yoonjoo and
Shen, Shannon and
Qin, Yanxia",
booktitle = "Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sdp-1.6",
pages = "58--69",
abstract = "As new research on Large Language Models (LLMs) continues, it is difficult to keep up with new research and models. To help researchers synthesize the new research many have written survey papers, but even those have become numerous. In this paper, we develop a method to automatically assign survey papers to a taxonomy. We collect the metadata of 144 LLM survey papers and explore three paradigms to classify papers within the taxonomy. Our work indicates that leveraging graph structure information on co-category graphs can significantly outperform the language models in two paradigms; pre-trained language models{'} fine-tuning and zero-shot/few-shot classifications using LLMs. We find that our model surpasses an average human recognition level and that fine-tuning LLMs using weak labels generated by a smaller model, such as the GCN in this study, can be more effective than using ground-truth labels, revealing the potential of weak-to-strong generalization in the taxonomy classification task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhuang-kennington-2024-understanding">
<titleInfo>
<title>Understanding Survey Paper Taxonomy about Large Language Models via Graph Representation Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Casey</namePart>
<namePart type="family">Kennington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orion</namePart>
<namePart type="family">Weller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoonjoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxia</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As new research on Large Language Models (LLMs) continues, it is difficult to keep up with new research and models. To help researchers synthesize the new research many have written survey papers, but even those have become numerous. In this paper, we develop a method to automatically assign survey papers to a taxonomy. We collect the metadata of 144 LLM survey papers and explore three paradigms to classify papers within the taxonomy. Our work indicates that leveraging graph structure information on co-category graphs can significantly outperform the language models in two paradigms; pre-trained language models’ fine-tuning and zero-shot/few-shot classifications using LLMs. We find that our model surpasses an average human recognition level and that fine-tuning LLMs using weak labels generated by a smaller model, such as the GCN in this study, can be more effective than using ground-truth labels, revealing the potential of weak-to-strong generalization in the taxonomy classification task.</abstract>
<identifier type="citekey">zhuang-kennington-2024-understanding</identifier>
<location>
<url>https://aclanthology.org/2024.sdp-1.6</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>58</start>
<end>69</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Understanding Survey Paper Taxonomy about Large Language Models via Graph Representation Learning
%A Zhuang, Jun
%A Kennington, Casey
%Y Ghosal, Tirthankar
%Y Singh, Amanpreet
%Y Waard, Anita
%Y Mayr, Philipp
%Y Naik, Aakanksha
%Y Weller, Orion
%Y Lee, Yoonjoo
%Y Shen, Shannon
%Y Qin, Yanxia
%S Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F zhuang-kennington-2024-understanding
%X As new research on Large Language Models (LLMs) continues, it is difficult to keep up with new research and models. To help researchers synthesize the new research many have written survey papers, but even those have become numerous. In this paper, we develop a method to automatically assign survey papers to a taxonomy. We collect the metadata of 144 LLM survey papers and explore three paradigms to classify papers within the taxonomy. Our work indicates that leveraging graph structure information on co-category graphs can significantly outperform the language models in two paradigms; pre-trained language models’ fine-tuning and zero-shot/few-shot classifications using LLMs. We find that our model surpasses an average human recognition level and that fine-tuning LLMs using weak labels generated by a smaller model, such as the GCN in this study, can be more effective than using ground-truth labels, revealing the potential of weak-to-strong generalization in the taxonomy classification task.
%U https://aclanthology.org/2024.sdp-1.6
%P 58-69
Markdown (Informal)
[Understanding Survey Paper Taxonomy about Large Language Models via Graph Representation Learning](https://aclanthology.org/2024.sdp-1.6) (Zhuang & Kennington, sdp-WS 2024)
ACL