@inproceedings{ankush-etal-2023-kitlm,
title = "{KITLM}: Domain-Specific Knowledge {I}n{T}egration into Language Models for Question Answering",
author = "Agarwal, Ankush and
Gawade, Sakharam and
Azad, Amar Prakash and
Bhattacharyya, Pushpak",
editor = "D. Pawar, Jyoti and
Lalitha Devi, Sobha",
booktitle = "Proceedings of the 20th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2023",
address = "Goa University, Goa, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2023.icon-1.20",
pages = "202--294",
abstract = "Large language models (LLMs) have demon- strated remarkable performance in a wide range of natural language tasks. However, as these models continue to grow in size, they face sig- nificant challenges in terms of computational costs. Additionally, LLMs often lack efficient domain-specific understanding, which is par- ticularly crucial in specialized fields such as aviation and healthcare. To boost the domain- specific understanding, we propose, KITLM 1 , a novel knowledge base integration approach into language model through relevant informa- tion infusion. By integrating pertinent knowl- edge, not only the performance of the lan- guage model is greatly enhanced, but the model size requirement is also significantly reduced while achieving comparable performance. Our proposed knowledge-infused model surpasses the performance of both GPT-3.5-turbo and the state-of-the-art knowledge infusion method, SKILL, achieving over 1.5 times improvement in exact match scores on the MetaQA. KITLM showed a similar performance boost in the avi- ation domain with AeroQA. The drastic perfor- mance improvement of KITLM over the exist- ing methods can be attributed to the infusion of relevant knowledge while mitigating noise. In addition, we release two curated datasets to accelerate knowledge infusion research in specialized fields: a) AeroQA, a new bench- mark dataset designed for multi-hop question- answering within the aviation domain, and b) Aviation Corpus, a dataset constructed from unstructured text extracted from the National Transportation Safety Board reports. Our re- search contributes to advancing the field of domain-specific language understanding and showcases the potential of knowledge infusion techniques in improving the performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ankush-etal-2023-kitlm">
<titleInfo>
<title>KITLM: Domain-Specific Knowledge InTegration into Language Models for Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ankush</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakharam</namePart>
<namePart type="family">Gawade</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amar</namePart>
<namePart type="given">Prakash</namePart>
<namePart type="family">Azad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jyoti</namePart>
<namePart type="family">D. Pawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Goa University, Goa, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have demon- strated remarkable performance in a wide range of natural language tasks. However, as these models continue to grow in size, they face sig- nificant challenges in terms of computational costs. Additionally, LLMs often lack efficient domain-specific understanding, which is par- ticularly crucial in specialized fields such as aviation and healthcare. To boost the domain- specific understanding, we propose, KITLM 1 , a novel knowledge base integration approach into language model through relevant informa- tion infusion. By integrating pertinent knowl- edge, not only the performance of the lan- guage model is greatly enhanced, but the model size requirement is also significantly reduced while achieving comparable performance. Our proposed knowledge-infused model surpasses the performance of both GPT-3.5-turbo and the state-of-the-art knowledge infusion method, SKILL, achieving over 1.5 times improvement in exact match scores on the MetaQA. KITLM showed a similar performance boost in the avi- ation domain with AeroQA. The drastic perfor- mance improvement of KITLM over the exist- ing methods can be attributed to the infusion of relevant knowledge while mitigating noise. In addition, we release two curated datasets to accelerate knowledge infusion research in specialized fields: a) AeroQA, a new bench- mark dataset designed for multi-hop question- answering within the aviation domain, and b) Aviation Corpus, a dataset constructed from unstructured text extracted from the National Transportation Safety Board reports. Our re- search contributes to advancing the field of domain-specific language understanding and showcases the potential of knowledge infusion techniques in improving the performance.</abstract>
<identifier type="citekey">ankush-etal-2023-kitlm</identifier>
<location>
<url>https://aclanthology.org/2023.icon-1.20</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>202</start>
<end>294</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T KITLM: Domain-Specific Knowledge InTegration into Language Models for Question Answering
%A Agarwal, Ankush
%A Gawade, Sakharam
%A Azad, Amar Prakash
%A Bhattacharyya, Pushpak
%Y D. Pawar, Jyoti
%Y Lalitha Devi, Sobha
%S Proceedings of the 20th International Conference on Natural Language Processing (ICON)
%D 2023
%8 December
%I NLP Association of India (NLPAI)
%C Goa University, Goa, India
%F ankush-etal-2023-kitlm
%X Large language models (LLMs) have demon- strated remarkable performance in a wide range of natural language tasks. However, as these models continue to grow in size, they face sig- nificant challenges in terms of computational costs. Additionally, LLMs often lack efficient domain-specific understanding, which is par- ticularly crucial in specialized fields such as aviation and healthcare. To boost the domain- specific understanding, we propose, KITLM 1 , a novel knowledge base integration approach into language model through relevant informa- tion infusion. By integrating pertinent knowl- edge, not only the performance of the lan- guage model is greatly enhanced, but the model size requirement is also significantly reduced while achieving comparable performance. Our proposed knowledge-infused model surpasses the performance of both GPT-3.5-turbo and the state-of-the-art knowledge infusion method, SKILL, achieving over 1.5 times improvement in exact match scores on the MetaQA. KITLM showed a similar performance boost in the avi- ation domain with AeroQA. The drastic perfor- mance improvement of KITLM over the exist- ing methods can be attributed to the infusion of relevant knowledge while mitigating noise. In addition, we release two curated datasets to accelerate knowledge infusion research in specialized fields: a) AeroQA, a new bench- mark dataset designed for multi-hop question- answering within the aviation domain, and b) Aviation Corpus, a dataset constructed from unstructured text extracted from the National Transportation Safety Board reports. Our re- search contributes to advancing the field of domain-specific language understanding and showcases the potential of knowledge infusion techniques in improving the performance.
%U https://aclanthology.org/2023.icon-1.20
%P 202-294
Markdown (Informal)
[KITLM: Domain-Specific Knowledge InTegration into Language Models for Question Answering](https://aclanthology.org/2023.icon-1.20) (Agarwal et al., ICON 2023)
ACL