@article{pikuliak-2024-using,
title = "On Using Self-Report Studies to Analyze Language Models",
author = "Pikuliak, Mat{\'u}{\v{s}}",
editor = "Bollmann, Marcel",
journal = "Northern European Journal of Language Technology",
volume = "10",
month = dec,
year = "2024",
address = {Link{\"o}ping, Sweden},
publisher = {Link{\"o}ping University Electronic Press},
url = "https://aclanthology.org/2024.nejlt-1.5/",
doi = "10.3384/nejlt.2000-1533.2024.5000",
pages = "78--85",
abstract = "We are at a curious point in time where our ability to build language models (LMs) has outpaced our ability to analyze them. We do not really know how to reliably determine their capabilities, biases, dangers, knowledge, and so on. The benchmarks we have are often overly specific, do not generalize well, and are susceptible to data leakage. Recently, I have noticed a trend of using self-report studies, such as various polls and questionnaires originally designed for humans, to analyze the properties of LMs. I think that this approach can easily lead to false results, which can be quite dangerous considering the current discussions on AI safety, governance, and regulation. To illustrate my point, I will delve deeper into several papers that employ self-report methodologies and I will try to highlight some of their weaknesses."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pikuliak-2024-using">
<titleInfo>
<title>On Using Self-Report Studies to Analyze Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matúš</namePart>
<namePart type="family">Pikuliak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Northern European Journal of Language Technology</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>Linköping University Electronic Press</publisher>
<place>
<placeTerm type="text">Linköping, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We are at a curious point in time where our ability to build language models (LMs) has outpaced our ability to analyze them. We do not really know how to reliably determine their capabilities, biases, dangers, knowledge, and so on. The benchmarks we have are often overly specific, do not generalize well, and are susceptible to data leakage. Recently, I have noticed a trend of using self-report studies, such as various polls and questionnaires originally designed for humans, to analyze the properties of LMs. I think that this approach can easily lead to false results, which can be quite dangerous considering the current discussions on AI safety, governance, and regulation. To illustrate my point, I will delve deeper into several papers that employ self-report methodologies and I will try to highlight some of their weaknesses.</abstract>
<identifier type="citekey">pikuliak-2024-using</identifier>
<identifier type="doi">10.3384/nejlt.2000-1533.2024.5000</identifier>
<location>
<url>https://aclanthology.org/2024.nejlt-1.5/</url>
</location>
<part>
<date>2024-12</date>
<detail type="volume"><number>10</number></detail>
<extent unit="page">
<start>78</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T On Using Self-Report Studies to Analyze Language Models
%A Pikuliak, Matúš
%J Northern European Journal of Language Technology
%D 2024
%8 December
%V 10
%I Linköping University Electronic Press
%C Linköping, Sweden
%F pikuliak-2024-using
%X We are at a curious point in time where our ability to build language models (LMs) has outpaced our ability to analyze them. We do not really know how to reliably determine their capabilities, biases, dangers, knowledge, and so on. The benchmarks we have are often overly specific, do not generalize well, and are susceptible to data leakage. Recently, I have noticed a trend of using self-report studies, such as various polls and questionnaires originally designed for humans, to analyze the properties of LMs. I think that this approach can easily lead to false results, which can be quite dangerous considering the current discussions on AI safety, governance, and regulation. To illustrate my point, I will delve deeper into several papers that employ self-report methodologies and I will try to highlight some of their weaknesses.
%R 10.3384/nejlt.2000-1533.2024.5000
%U https://aclanthology.org/2024.nejlt-1.5/
%U https://doi.org/10.3384/nejlt.2000-1533.2024.5000
%P 78-85
Markdown (Informal)
[On Using Self-Report Studies to Analyze Language Models](https://aclanthology.org/2024.nejlt-1.5/) (Pikuliak, NEJLT 2024)
ACL