@inproceedings{lee-etal-2023-linguistic,
title = "Linguistic Properties of Truthful Response",
author = "Lee, Bruce W. and
Arockiaraj, Benedict Florance and
Jin, Helen",
editor = "Ovalle, Anaelia and
Chang, Kai-Wei and
Mehrabi, Ninareh and
Pruksachatkun, Yada and
Galystan, Aram and
Dhamala, Jwala and
Verma, Apurv and
Cao, Trista and
Kumar, Anoop and
Gupta, Rahul",
booktitle = "Proceedings of the 3rd Workshop on Trustworthy Natural Language Processing (TrustNLP 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.trustnlp-1.12",
doi = "10.18653/v1/2023.trustnlp-1.12",
pages = "135--140",
abstract = "We investigate the phenomenon of an LLM{'}s untruthful response using a large set of 220 handcrafted linguistic features. We focus on GPT-3 models and find that the linguistic profiles of responses are similar across model sizes. That is, how varying-sized LLMs respond to given prompts stays similar on the linguistic properties level. We expand upon this finding by training support vector machines that rely only upon the stylistic components of model responses to classify the truthfulness of statements. Though the dataset size limits our current findings, we present promising evidence that truthfulness detection is possible without evaluating the content itself. We release our code and raw data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2023-linguistic">
<titleInfo>
<title>Linguistic Properties of Truthful Response</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bruce</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benedict</namePart>
<namePart type="given">Florance</namePart>
<namePart type="family">Arockiaraj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Trustworthy Natural Language Processing (TrustNLP 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anaelia</namePart>
<namePart type="family">Ovalle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ninareh</namePart>
<namePart type="family">Mehrabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yada</namePart>
<namePart type="family">Pruksachatkun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aram</namePart>
<namePart type="family">Galystan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jwala</namePart>
<namePart type="family">Dhamala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apurv</namePart>
<namePart type="family">Verma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trista</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the phenomenon of an LLM’s untruthful response using a large set of 220 handcrafted linguistic features. We focus on GPT-3 models and find that the linguistic profiles of responses are similar across model sizes. That is, how varying-sized LLMs respond to given prompts stays similar on the linguistic properties level. We expand upon this finding by training support vector machines that rely only upon the stylistic components of model responses to classify the truthfulness of statements. Though the dataset size limits our current findings, we present promising evidence that truthfulness detection is possible without evaluating the content itself. We release our code and raw data.</abstract>
<identifier type="citekey">lee-etal-2023-linguistic</identifier>
<identifier type="doi">10.18653/v1/2023.trustnlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2023.trustnlp-1.12</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>135</start>
<end>140</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linguistic Properties of Truthful Response
%A Lee, Bruce W.
%A Arockiaraj, Benedict Florance
%A Jin, Helen
%Y Ovalle, Anaelia
%Y Chang, Kai-Wei
%Y Mehrabi, Ninareh
%Y Pruksachatkun, Yada
%Y Galystan, Aram
%Y Dhamala, Jwala
%Y Verma, Apurv
%Y Cao, Trista
%Y Kumar, Anoop
%Y Gupta, Rahul
%S Proceedings of the 3rd Workshop on Trustworthy Natural Language Processing (TrustNLP 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F lee-etal-2023-linguistic
%X We investigate the phenomenon of an LLM’s untruthful response using a large set of 220 handcrafted linguistic features. We focus on GPT-3 models and find that the linguistic profiles of responses are similar across model sizes. That is, how varying-sized LLMs respond to given prompts stays similar on the linguistic properties level. We expand upon this finding by training support vector machines that rely only upon the stylistic components of model responses to classify the truthfulness of statements. Though the dataset size limits our current findings, we present promising evidence that truthfulness detection is possible without evaluating the content itself. We release our code and raw data.
%R 10.18653/v1/2023.trustnlp-1.12
%U https://aclanthology.org/2023.trustnlp-1.12
%U https://doi.org/10.18653/v1/2023.trustnlp-1.12
%P 135-140
Markdown (Informal)
[Linguistic Properties of Truthful Response](https://aclanthology.org/2023.trustnlp-1.12) (Lee et al., TrustNLP 2023)
ACL
- Bruce W. Lee, Benedict Florance Arockiaraj, and Helen Jin. 2023. Linguistic Properties of Truthful Response. In Proceedings of the 3rd Workshop on Trustworthy Natural Language Processing (TrustNLP 2023), pages 135–140, Toronto, Canada. Association for Computational Linguistics.