@inproceedings{imperial-tayyar-madabushi-2024-specialex,
title = "{S}pecia{L}ex: A Benchmark for In-Context Specialized Lexicon Learning",
author = "Imperial, Joseph Marvin and
Tayyar Madabushi, Harish",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.52",
doi = "10.18653/v1/2024.findings-emnlp.52",
pages = "930--965",
abstract = "Specialized lexicons are collections of words with associated constraints such as special definitions, specific roles, and intended target audiences. These constraints are necessary for content generation and documentation tasks (e.g., writing technical manuals or children{'}s reading materials), where the goal is to reduce the ambiguity of text content and increase its overall readability for a specific group of audience. Understanding how large language models can capture these constraints can help researchers build better, more impactful tools for wider use beyond the NLP community. Towards this end, we introduce SpeciaLex, a benchmark for evaluating a language model{'}s ability to follow specialized lexicon-based constraints across 18 diverse subtasks with 1,785 test instances covering core tasks of Checking, Identification, Rewriting, and Open Generation. We present an empirical evaluation of 15 open and closed-source LLMs and discuss insights on how factors such as model scale, openness, setup, and recency affect performance upon evaluating with the benchmark.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="imperial-tayyar-madabushi-2024-specialex">
<titleInfo>
<title>SpeciaLex: A Benchmark for In-Context Specialized Lexicon Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="given">Marvin</namePart>
<namePart type="family">Imperial</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Specialized lexicons are collections of words with associated constraints such as special definitions, specific roles, and intended target audiences. These constraints are necessary for content generation and documentation tasks (e.g., writing technical manuals or children’s reading materials), where the goal is to reduce the ambiguity of text content and increase its overall readability for a specific group of audience. Understanding how large language models can capture these constraints can help researchers build better, more impactful tools for wider use beyond the NLP community. Towards this end, we introduce SpeciaLex, a benchmark for evaluating a language model’s ability to follow specialized lexicon-based constraints across 18 diverse subtasks with 1,785 test instances covering core tasks of Checking, Identification, Rewriting, and Open Generation. We present an empirical evaluation of 15 open and closed-source LLMs and discuss insights on how factors such as model scale, openness, setup, and recency affect performance upon evaluating with the benchmark.</abstract>
<identifier type="citekey">imperial-tayyar-madabushi-2024-specialex</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.52</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.52</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>930</start>
<end>965</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SpeciaLex: A Benchmark for In-Context Specialized Lexicon Learning
%A Imperial, Joseph Marvin
%A Tayyar Madabushi, Harish
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F imperial-tayyar-madabushi-2024-specialex
%X Specialized lexicons are collections of words with associated constraints such as special definitions, specific roles, and intended target audiences. These constraints are necessary for content generation and documentation tasks (e.g., writing technical manuals or children’s reading materials), where the goal is to reduce the ambiguity of text content and increase its overall readability for a specific group of audience. Understanding how large language models can capture these constraints can help researchers build better, more impactful tools for wider use beyond the NLP community. Towards this end, we introduce SpeciaLex, a benchmark for evaluating a language model’s ability to follow specialized lexicon-based constraints across 18 diverse subtasks with 1,785 test instances covering core tasks of Checking, Identification, Rewriting, and Open Generation. We present an empirical evaluation of 15 open and closed-source LLMs and discuss insights on how factors such as model scale, openness, setup, and recency affect performance upon evaluating with the benchmark.
%R 10.18653/v1/2024.findings-emnlp.52
%U https://aclanthology.org/2024.findings-emnlp.52
%U https://doi.org/10.18653/v1/2024.findings-emnlp.52
%P 930-965
Markdown (Informal)
[SpeciaLex: A Benchmark for In-Context Specialized Lexicon Learning](https://aclanthology.org/2024.findings-emnlp.52) (Imperial & Tayyar Madabushi, Findings 2024)
ACL