@inproceedings{ningtyas-etal-2026-annohid,
title = "{A}nno{HID}: {LLM}-Assisted Annotation Framework for Low-Resource Medical Texts",
author = "Ningtyas, Annisa Maulida and
Herwanto, Guntur Budi and
Sari, Yunita and
Putri, Rifki Afina and
Kovacevic, Filip and
El-Ebshihy, Alaa and
Arzt, Varvara and
Piroi, Florina",
editor = "Durrett, Greg and
Jian, Ping",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-demo.67/",
pages = "683--691",
ISBN = "979-8-89176-392-0",
abstract = "This paper introduces AnnoHID, a semi-automated annotation framework designed for medical texts in low-resource languages. The system integrates large language models (LLMs) for pre-annotation and human validation to support efficient and consistent annotation. We demonstrate its application to Bahasa Indonesia medical social media texts from Alodokter, a medical Q A platform, for Named Entity Recognition (NER) and Medical Concept Normalization (MCN). We conducted a user study with 21 participants to demonstrate the effectiveness of AnnoHID. The results show that LLM-assisted annotation yields higher inter-annotator agreement for both NER ($\kappa$ = 0.76) and MCN ($\kappa$ = 0.63) and that human review improves raw LLM NER output, raising the F1 score from 0.39 to 0.45. However, LLM assistance did not reduce annotation time and may introduce normalization bias in MCN. The framework is multilingual, human-in-the-loop, and interoperable with standard medical terminologies, such as SNOMED-CT. Future work focuses on mitigating pre-annotation bias, reducing annotation overhead, and scaling evaluations to larger datasets and additional low-resource languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ningtyas-etal-2026-annohid">
<titleInfo>
<title>AnnoHID: LLM-Assisted Annotation Framework for Low-Resource Medical Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annisa</namePart>
<namePart type="given">Maulida</namePart>
<namePart type="family">Ningtyas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guntur</namePart>
<namePart type="given">Budi</namePart>
<namePart type="family">Herwanto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunita</namePart>
<namePart type="family">Sari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rifki</namePart>
<namePart type="given">Afina</namePart>
<namePart type="family">Putri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Kovacevic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alaa</namePart>
<namePart type="family">El-Ebshihy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varvara</namePart>
<namePart type="family">Arzt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florina</namePart>
<namePart type="family">Piroi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ping</namePart>
<namePart type="family">Jian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-392-0</identifier>
</relatedItem>
<abstract>This paper introduces AnnoHID, a semi-automated annotation framework designed for medical texts in low-resource languages. The system integrates large language models (LLMs) for pre-annotation and human validation to support efficient and consistent annotation. We demonstrate its application to Bahasa Indonesia medical social media texts from Alodokter, a medical Q A platform, for Named Entity Recognition (NER) and Medical Concept Normalization (MCN). We conducted a user study with 21 participants to demonstrate the effectiveness of AnnoHID. The results show that LLM-assisted annotation yields higher inter-annotator agreement for both NER (ąppa = 0.76) and MCN (ąppa = 0.63) and that human review improves raw LLM NER output, raising the F1 score from 0.39 to 0.45. However, LLM assistance did not reduce annotation time and may introduce normalization bias in MCN. The framework is multilingual, human-in-the-loop, and interoperable with standard medical terminologies, such as SNOMED-CT. Future work focuses on mitigating pre-annotation bias, reducing annotation overhead, and scaling evaluations to larger datasets and additional low-resource languages.</abstract>
<identifier type="citekey">ningtyas-etal-2026-annohid</identifier>
<location>
<url>https://aclanthology.org/2026.acl-demo.67/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>683</start>
<end>691</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AnnoHID: LLM-Assisted Annotation Framework for Low-Resource Medical Texts
%A Ningtyas, Annisa Maulida
%A Herwanto, Guntur Budi
%A Sari, Yunita
%A Putri, Rifki Afina
%A Kovacevic, Filip
%A El-Ebshihy, Alaa
%A Arzt, Varvara
%A Piroi, Florina
%Y Durrett, Greg
%Y Jian, Ping
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-392-0
%F ningtyas-etal-2026-annohid
%X This paper introduces AnnoHID, a semi-automated annotation framework designed for medical texts in low-resource languages. The system integrates large language models (LLMs) for pre-annotation and human validation to support efficient and consistent annotation. We demonstrate its application to Bahasa Indonesia medical social media texts from Alodokter, a medical Q A platform, for Named Entity Recognition (NER) and Medical Concept Normalization (MCN). We conducted a user study with 21 participants to demonstrate the effectiveness of AnnoHID. The results show that LLM-assisted annotation yields higher inter-annotator agreement for both NER (ąppa = 0.76) and MCN (ąppa = 0.63) and that human review improves raw LLM NER output, raising the F1 score from 0.39 to 0.45. However, LLM assistance did not reduce annotation time and may introduce normalization bias in MCN. The framework is multilingual, human-in-the-loop, and interoperable with standard medical terminologies, such as SNOMED-CT. Future work focuses on mitigating pre-annotation bias, reducing annotation overhead, and scaling evaluations to larger datasets and additional low-resource languages.
%U https://aclanthology.org/2026.acl-demo.67/
%P 683-691
Markdown (Informal)
[AnnoHID: LLM-Assisted Annotation Framework for Low-Resource Medical Texts](https://aclanthology.org/2026.acl-demo.67/) (Ningtyas et al., ACL 2026)
ACL
- Annisa Maulida Ningtyas, Guntur Budi Herwanto, Yunita Sari, Rifki Afina Putri, Filip Kovacevic, Alaa El-Ebshihy, Varvara Arzt, and Florina Piroi. 2026. AnnoHID: LLM-Assisted Annotation Framework for Low-Resource Medical Texts. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 683–691, San Diego, California, United States. Association for Computational Linguistics.