@inproceedings{shaib-etal-2024-detection,
title = "Detection and Measurement of Syntactic Templates in Generated Text",
author = "Shaib, Chantal and
Elazar, Yanai and
Li, Junyi Jessy and
Wallace, Byron C",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.368",
doi = "10.18653/v1/2024.emnlp-main.368",
pages = "6416--6431",
abstract = "The diversity of text can be measured beyond word-level features, however existing diversity evaluation focuses primarily on word-level features. Here we propose a method for evaluating diversity over syntactic features to characterize general repetition in models, beyond frequent $n$-grams. Specifically, we define \textit{syntactic templates} (e.g., strings comprising parts-of-speech) and show that models tend to produce templated text in downstream tasks at a higher rate than what is found in human-reference textsWe find that most (76{\%}) templates in model-generated text can be found in pre-training data (compared to only 35{\%} of human-authored text), and are not overwritten during fine-tuning or alignment processes such as RLHF. The connection between templates in generated text and the pre-training data allows us to analyze syntactic templates in models where we do not have the pre-training data.We also find that templates as features are able to differentiate between models, tasks, and domains, and are useful for qualitatively evaluating common model constructions.Finally, we demonstrate the use of templates as a useful tool for analyzing style memorization of training data in LLMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shaib-etal-2024-detection">
<titleInfo>
<title>Detection and Measurement of Syntactic Templates in Generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chantal</namePart>
<namePart type="family">Shaib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byron</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Wallace</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The diversity of text can be measured beyond word-level features, however existing diversity evaluation focuses primarily on word-level features. Here we propose a method for evaluating diversity over syntactic features to characterize general repetition in models, beyond frequent n-grams. Specifically, we define syntactic templates (e.g., strings comprising parts-of-speech) and show that models tend to produce templated text in downstream tasks at a higher rate than what is found in human-reference textsWe find that most (76%) templates in model-generated text can be found in pre-training data (compared to only 35% of human-authored text), and are not overwritten during fine-tuning or alignment processes such as RLHF. The connection between templates in generated text and the pre-training data allows us to analyze syntactic templates in models where we do not have the pre-training data.We also find that templates as features are able to differentiate between models, tasks, and domains, and are useful for qualitatively evaluating common model constructions.Finally, we demonstrate the use of templates as a useful tool for analyzing style memorization of training data in LLMs.</abstract>
<identifier type="citekey">shaib-etal-2024-detection</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.368</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.368</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6416</start>
<end>6431</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detection and Measurement of Syntactic Templates in Generated Text
%A Shaib, Chantal
%A Elazar, Yanai
%A Li, Junyi Jessy
%A Wallace, Byron C.
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F shaib-etal-2024-detection
%X The diversity of text can be measured beyond word-level features, however existing diversity evaluation focuses primarily on word-level features. Here we propose a method for evaluating diversity over syntactic features to characterize general repetition in models, beyond frequent n-grams. Specifically, we define syntactic templates (e.g., strings comprising parts-of-speech) and show that models tend to produce templated text in downstream tasks at a higher rate than what is found in human-reference textsWe find that most (76%) templates in model-generated text can be found in pre-training data (compared to only 35% of human-authored text), and are not overwritten during fine-tuning or alignment processes such as RLHF. The connection between templates in generated text and the pre-training data allows us to analyze syntactic templates in models where we do not have the pre-training data.We also find that templates as features are able to differentiate between models, tasks, and domains, and are useful for qualitatively evaluating common model constructions.Finally, we demonstrate the use of templates as a useful tool for analyzing style memorization of training data in LLMs.
%R 10.18653/v1/2024.emnlp-main.368
%U https://aclanthology.org/2024.emnlp-main.368
%U https://doi.org/10.18653/v1/2024.emnlp-main.368
%P 6416-6431
Markdown (Informal)
[Detection and Measurement of Syntactic Templates in Generated Text](https://aclanthology.org/2024.emnlp-main.368) (Shaib et al., EMNLP 2024)
ACL