@inproceedings{durward-thomson-2024-evaluating,
title = "Evaluating Vocabulary Usage in {LLM}s",
author = "Durward, Matthew and
Thomson, Christopher",
editor = {Kochmar, Ekaterina and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"\i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.bea-1.22",
pages = "266--282",
abstract = "The paper focuses on investigating vocabulary usage for AI and human-generated text. We define vocabulary usage in two ways: structural differences and keyword differences. Structural differences are evaluated by converting text into Vocabulary-Managment Profiles, initially used for discourse analysis. Through VMPs, we can treat the text data as a time series, allowing an evaluation by implementing Dynamic time-warping distance measures and subsequently deriving similarity scores to provide an indication of whether the structural dynamics in AI texts resemble human texts. To analyze keywords, we use a measure that emphasizes frequency and dispersion to source {`}key{'} keywords. A qualitative approach is then applied, noting thematic differences between human and AI writing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="durward-thomson-2024-evaluating">
<titleInfo>
<title>Evaluating Vocabulary Usage in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Durward</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Bexte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Horbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anaïs</namePart>
<namePart type="family">Tack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Yaneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper focuses on investigating vocabulary usage for AI and human-generated text. We define vocabulary usage in two ways: structural differences and keyword differences. Structural differences are evaluated by converting text into Vocabulary-Managment Profiles, initially used for discourse analysis. Through VMPs, we can treat the text data as a time series, allowing an evaluation by implementing Dynamic time-warping distance measures and subsequently deriving similarity scores to provide an indication of whether the structural dynamics in AI texts resemble human texts. To analyze keywords, we use a measure that emphasizes frequency and dispersion to source ‘key’ keywords. A qualitative approach is then applied, noting thematic differences between human and AI writing.</abstract>
<identifier type="citekey">durward-thomson-2024-evaluating</identifier>
<location>
<url>https://aclanthology.org/2024.bea-1.22</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>266</start>
<end>282</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Vocabulary Usage in LLMs
%A Durward, Matthew
%A Thomson, Christopher
%Y Kochmar, Ekaterina
%Y Bexte, Marie
%Y Burstein, Jill
%Y Horbach, Andrea
%Y Laarmann-Quante, Ronja
%Y Tack, Anaïs
%Y Yaneva, Victoria
%Y Yuan, Zheng
%S Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F durward-thomson-2024-evaluating
%X The paper focuses on investigating vocabulary usage for AI and human-generated text. We define vocabulary usage in two ways: structural differences and keyword differences. Structural differences are evaluated by converting text into Vocabulary-Managment Profiles, initially used for discourse analysis. Through VMPs, we can treat the text data as a time series, allowing an evaluation by implementing Dynamic time-warping distance measures and subsequently deriving similarity scores to provide an indication of whether the structural dynamics in AI texts resemble human texts. To analyze keywords, we use a measure that emphasizes frequency and dispersion to source ‘key’ keywords. A qualitative approach is then applied, noting thematic differences between human and AI writing.
%U https://aclanthology.org/2024.bea-1.22
%P 266-282
Markdown (Informal)
[Evaluating Vocabulary Usage in LLMs](https://aclanthology.org/2024.bea-1.22) (Durward & Thomson, BEA 2024)
ACL
- Matthew Durward and Christopher Thomson. 2024. Evaluating Vocabulary Usage in LLMs. In Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024), pages 266–282, Mexico City, Mexico. Association for Computational Linguistics.