@inproceedings{bibal-etal-2025-automating,
title = "Automating Annotation Guideline Improvements using {LLM}s: A Case Study",
author = "Bibal, Adrien and
Gerlek, Nathaniel and
Muric, Goran and
Boschee, Elizabeth and
Fincke, Steven C. and
Ross, Mike and
Minton, Steven N.",
editor = "Roth, Michael and
Schlechtweg, Dominik",
booktitle = "Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.comedi-1.13/",
pages = "129--144",
abstract = "Annotating texts can be a tedious task, especially when texts are noisy. At the root of the issue, guidelines are not always optimized enough to be able to perform the required annotation task. In difficult cases, complex workflows are designed to be able to reach the best possible guidelines. However, crowdsource workers are commonly recruited to go through these complex workflows, limiting the number of iterations over the workflows, and therefore, the possible results because of the slow speed and the high cost of workers. In this paper, our case study, based on the entity recognition problem, suggests that LLMs can help produce guidelines of high quality (inter-annotator agreement going from 0.593 to 0.84 when improving WNUT-17`s guidelines), while being faster and cheaper than crowdsource workers."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bibal-etal-2025-automating">
<titleInfo>
<title>Automating Annotation Guideline Improvements using LLMs: A Case Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adrien</namePart>
<namePart type="family">Bibal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Gerlek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Goran</namePart>
<namePart type="family">Muric</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Boschee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Fincke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Ross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="given">N</namePart>
<namePart type="family">Minton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Schlechtweg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Annotating texts can be a tedious task, especially when texts are noisy. At the root of the issue, guidelines are not always optimized enough to be able to perform the required annotation task. In difficult cases, complex workflows are designed to be able to reach the best possible guidelines. However, crowdsource workers are commonly recruited to go through these complex workflows, limiting the number of iterations over the workflows, and therefore, the possible results because of the slow speed and the high cost of workers. In this paper, our case study, based on the entity recognition problem, suggests that LLMs can help produce guidelines of high quality (inter-annotator agreement going from 0.593 to 0.84 when improving WNUT-17‘s guidelines), while being faster and cheaper than crowdsource workers.</abstract>
<identifier type="citekey">bibal-etal-2025-automating</identifier>
<location>
<url>https://aclanthology.org/2025.comedi-1.13/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>129</start>
<end>144</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automating Annotation Guideline Improvements using LLMs: A Case Study
%A Bibal, Adrien
%A Gerlek, Nathaniel
%A Muric, Goran
%A Boschee, Elizabeth
%A Fincke, Steven C.
%A Ross, Mike
%A Minton, Steven N.
%Y Roth, Michael
%Y Schlechtweg, Dominik
%S Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F bibal-etal-2025-automating
%X Annotating texts can be a tedious task, especially when texts are noisy. At the root of the issue, guidelines are not always optimized enough to be able to perform the required annotation task. In difficult cases, complex workflows are designed to be able to reach the best possible guidelines. However, crowdsource workers are commonly recruited to go through these complex workflows, limiting the number of iterations over the workflows, and therefore, the possible results because of the slow speed and the high cost of workers. In this paper, our case study, based on the entity recognition problem, suggests that LLMs can help produce guidelines of high quality (inter-annotator agreement going from 0.593 to 0.84 when improving WNUT-17‘s guidelines), while being faster and cheaper than crowdsource workers.
%U https://aclanthology.org/2025.comedi-1.13/
%P 129-144
Markdown (Informal)
[Automating Annotation Guideline Improvements using LLMs: A Case Study](https://aclanthology.org/2025.comedi-1.13/) (Bibal et al., CoMeDi 2025)
ACL
- Adrien Bibal, Nathaniel Gerlek, Goran Muric, Elizabeth Boschee, Steven C. Fincke, Mike Ross, and Steven N. Minton. 2025. Automating Annotation Guideline Improvements using LLMs: A Case Study. In Proceedings of Context and Meaning: Navigating Disagreements in NLP Annotation, pages 129–144, Abu Dhabi, UAE. International Committee on Computational Linguistics.