@inproceedings{hagstrom-etal-2023-effect,
title = "The Effect of Scaling, Retrieval Augmentation and Form on the Factual Consistency of Language Models",
author = {Hagstr{\"o}m, Lovisa and
Saynova, Denitsa and
Norlund, Tobias and
Johansson, Moa and
Johansson, Richard},
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.332",
doi = "10.18653/v1/2023.emnlp-main.332",
pages = "5457--5476",
abstract = "Large Language Models (LLMs) make natural interfaces to factual knowledge, but their usefulness is limited by their tendency to deliver inconsistent answers to semantically equivalent questions. For example, a model might supply the answer {``}Edinburgh{''} to {``}Anne Redpath passed away in X.{''} and {``}London{''} to {``}Anne Redpath{'}s life ended in X.{''} In this work, we identify potential causes of inconsistency and evaluate the effectiveness of two mitigation strategies: up-scaling and augmenting the LM with a passage retrieval database. Our results on the LLaMA and Atlas models show that both strategies reduce inconsistency but that retrieval augmentation is considerably more efficient. We further consider and disentangle the consistency contributions of different components of Atlas. For all LMs evaluated we find that syntactical form and task artifacts impact consistency. Taken together, our results provide a better understanding of the factors affecting the factual consistency of language models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hagstrom-etal-2023-effect">
<titleInfo>
<title>The Effect of Scaling, Retrieval Augmentation and Form on the Factual Consistency of Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lovisa</namePart>
<namePart type="family">Hagström</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Denitsa</namePart>
<namePart type="family">Saynova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Norlund</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moa</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) make natural interfaces to factual knowledge, but their usefulness is limited by their tendency to deliver inconsistent answers to semantically equivalent questions. For example, a model might supply the answer “Edinburgh” to “Anne Redpath passed away in X.” and “London” to “Anne Redpath’s life ended in X.” In this work, we identify potential causes of inconsistency and evaluate the effectiveness of two mitigation strategies: up-scaling and augmenting the LM with a passage retrieval database. Our results on the LLaMA and Atlas models show that both strategies reduce inconsistency but that retrieval augmentation is considerably more efficient. We further consider and disentangle the consistency contributions of different components of Atlas. For all LMs evaluated we find that syntactical form and task artifacts impact consistency. Taken together, our results provide a better understanding of the factors affecting the factual consistency of language models.</abstract>
<identifier type="citekey">hagstrom-etal-2023-effect</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.332</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.332</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>5457</start>
<end>5476</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Effect of Scaling, Retrieval Augmentation and Form on the Factual Consistency of Language Models
%A Hagström, Lovisa
%A Saynova, Denitsa
%A Norlund, Tobias
%A Johansson, Moa
%A Johansson, Richard
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F hagstrom-etal-2023-effect
%X Large Language Models (LLMs) make natural interfaces to factual knowledge, but their usefulness is limited by their tendency to deliver inconsistent answers to semantically equivalent questions. For example, a model might supply the answer “Edinburgh” to “Anne Redpath passed away in X.” and “London” to “Anne Redpath’s life ended in X.” In this work, we identify potential causes of inconsistency and evaluate the effectiveness of two mitigation strategies: up-scaling and augmenting the LM with a passage retrieval database. Our results on the LLaMA and Atlas models show that both strategies reduce inconsistency but that retrieval augmentation is considerably more efficient. We further consider and disentangle the consistency contributions of different components of Atlas. For all LMs evaluated we find that syntactical form and task artifacts impact consistency. Taken together, our results provide a better understanding of the factors affecting the factual consistency of language models.
%R 10.18653/v1/2023.emnlp-main.332
%U https://aclanthology.org/2023.emnlp-main.332
%U https://doi.org/10.18653/v1/2023.emnlp-main.332
%P 5457-5476
Markdown (Informal)
[The Effect of Scaling, Retrieval Augmentation and Form on the Factual Consistency of Language Models](https://aclanthology.org/2023.emnlp-main.332) (Hagström et al., EMNLP 2023)
ACL