@inproceedings{yao-etal-2022-wordties,
title = "{W}ord{T}ies: Measuring Word Associations in Language Models via Constrained Sampling",
author = "Yao, Peiran and
Renwick, Tobias and
Barbosa, Denilson",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.440",
doi = "10.18653/v1/2022.findings-emnlp.440",
pages = "5959--5970",
abstract = "Word associations are widely used in psychology to provide insights on how humans perceive and understand concepts. Comparing word associations in language models (LMs) to those generated by human subjects can serve as a proxy to uncover embedded lexical and commonsense knowledge in language models. While much helpful work has been done applying direct metrics, such as cosine similarity, to help understand latent spaces, these metrics are symmetric, while human word associativity is asymmetric. We propose WordTies, an algorithm based on constrained sampling from LMs, which allows an asymmetric measurement of associated words, given a cue word as the input. Comparing to existing methods, word associations found by this method share more overlap with associations provided by humans, and observe the asymmetric property of human associations. To examine possible reasons behind associations, we analyze the knowledge and reasoning behind the word pairings as they are linked to lexical and commonsense knowledge graphs.When the knowledge about the nature of the word pairings is combined with a probability that the LM has learned that information, we have a new way to examine what information is captured in LMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yao-etal-2022-wordties">
<titleInfo>
<title>WordTies: Measuring Word Associations in Language Models via Constrained Sampling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peiran</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Renwick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Denilson</namePart>
<namePart type="family">Barbosa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word associations are widely used in psychology to provide insights on how humans perceive and understand concepts. Comparing word associations in language models (LMs) to those generated by human subjects can serve as a proxy to uncover embedded lexical and commonsense knowledge in language models. While much helpful work has been done applying direct metrics, such as cosine similarity, to help understand latent spaces, these metrics are symmetric, while human word associativity is asymmetric. We propose WordTies, an algorithm based on constrained sampling from LMs, which allows an asymmetric measurement of associated words, given a cue word as the input. Comparing to existing methods, word associations found by this method share more overlap with associations provided by humans, and observe the asymmetric property of human associations. To examine possible reasons behind associations, we analyze the knowledge and reasoning behind the word pairings as they are linked to lexical and commonsense knowledge graphs.When the knowledge about the nature of the word pairings is combined with a probability that the LM has learned that information, we have a new way to examine what information is captured in LMs.</abstract>
<identifier type="citekey">yao-etal-2022-wordties</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.440</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.440</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5959</start>
<end>5970</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T WordTies: Measuring Word Associations in Language Models via Constrained Sampling
%A Yao, Peiran
%A Renwick, Tobias
%A Barbosa, Denilson
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F yao-etal-2022-wordties
%X Word associations are widely used in psychology to provide insights on how humans perceive and understand concepts. Comparing word associations in language models (LMs) to those generated by human subjects can serve as a proxy to uncover embedded lexical and commonsense knowledge in language models. While much helpful work has been done applying direct metrics, such as cosine similarity, to help understand latent spaces, these metrics are symmetric, while human word associativity is asymmetric. We propose WordTies, an algorithm based on constrained sampling from LMs, which allows an asymmetric measurement of associated words, given a cue word as the input. Comparing to existing methods, word associations found by this method share more overlap with associations provided by humans, and observe the asymmetric property of human associations. To examine possible reasons behind associations, we analyze the knowledge and reasoning behind the word pairings as they are linked to lexical and commonsense knowledge graphs.When the knowledge about the nature of the word pairings is combined with a probability that the LM has learned that information, we have a new way to examine what information is captured in LMs.
%R 10.18653/v1/2022.findings-emnlp.440
%U https://aclanthology.org/2022.findings-emnlp.440
%U https://doi.org/10.18653/v1/2022.findings-emnlp.440
%P 5959-5970
Markdown (Informal)
[WordTies: Measuring Word Associations in Language Models via Constrained Sampling](https://aclanthology.org/2022.findings-emnlp.440) (Yao et al., Findings 2022)
ACL