@inproceedings{ferreira-etal-2025-framework,
title = "A Framework for Fine-Grained Complexity Control in Health Answer Generation",
author = "Ferreira, Daniel Jorge Bernardo and
Almeida, Tiago and
Matos, S{\'e}rgio",
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-srw.87/",
doi = "10.18653/v1/2025.acl-srw.87",
pages = "1111--1131",
ISBN = "979-8-89176-254-1",
abstract = "Health literacy plays a critical role in ensuring people can access, understand, and act on medical information. However, much of the health content available today is too complex for many people, and simplifying these texts manually is time-consuming and difficult to do at scale.To overcome this, we developed a new framework for automatically generating health answers at multiple, precisely controlled complexity levels.We began with a thorough analysis of 166 linguistic features, which we then refined into 13 key metrics that reliably differentiate between simple and complex medical texts. From these metrics, we derived a robust complexity scoring formula, combining them with weights learned from a logistic regression model. This formula allowed us to create a large, multi-level dataset of health question-answer pairs covering 21 distinct complexity levels, ranging from elementary patient-friendly explanations to highly technical summaries.Finally, we fine-tuned a Llama-3.1-8B-Instruct model using ``control codes'' on this dataset, giving users precise control over the complexity of the generated text and empowering them to select the level of detail and technicality they need."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ferreira-etal-2025-framework">
<titleInfo>
<title>A Framework for Fine-Grained Complexity Control in Health Answer Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="given">Jorge</namePart>
<namePart type="given">Bernardo</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="family">Almeida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sérgio</namePart>
<namePart type="family">Matos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-254-1</identifier>
</relatedItem>
<abstract>Health literacy plays a critical role in ensuring people can access, understand, and act on medical information. However, much of the health content available today is too complex for many people, and simplifying these texts manually is time-consuming and difficult to do at scale.To overcome this, we developed a new framework for automatically generating health answers at multiple, precisely controlled complexity levels.We began with a thorough analysis of 166 linguistic features, which we then refined into 13 key metrics that reliably differentiate between simple and complex medical texts. From these metrics, we derived a robust complexity scoring formula, combining them with weights learned from a logistic regression model. This formula allowed us to create a large, multi-level dataset of health question-answer pairs covering 21 distinct complexity levels, ranging from elementary patient-friendly explanations to highly technical summaries.Finally, we fine-tuned a Llama-3.1-8B-Instruct model using “control codes” on this dataset, giving users precise control over the complexity of the generated text and empowering them to select the level of detail and technicality they need.</abstract>
<identifier type="citekey">ferreira-etal-2025-framework</identifier>
<identifier type="doi">10.18653/v1/2025.acl-srw.87</identifier>
<location>
<url>https://aclanthology.org/2025.acl-srw.87/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1111</start>
<end>1131</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Framework for Fine-Grained Complexity Control in Health Answer Generation
%A Ferreira, Daniel Jorge Bernardo
%A Almeida, Tiago
%A Matos, Sérgio
%Y Zhao, Jin
%Y Wang, Mingyang
%Y Liu, Zhu
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-254-1
%F ferreira-etal-2025-framework
%X Health literacy plays a critical role in ensuring people can access, understand, and act on medical information. However, much of the health content available today is too complex for many people, and simplifying these texts manually is time-consuming and difficult to do at scale.To overcome this, we developed a new framework for automatically generating health answers at multiple, precisely controlled complexity levels.We began with a thorough analysis of 166 linguistic features, which we then refined into 13 key metrics that reliably differentiate between simple and complex medical texts. From these metrics, we derived a robust complexity scoring formula, combining them with weights learned from a logistic regression model. This formula allowed us to create a large, multi-level dataset of health question-answer pairs covering 21 distinct complexity levels, ranging from elementary patient-friendly explanations to highly technical summaries.Finally, we fine-tuned a Llama-3.1-8B-Instruct model using “control codes” on this dataset, giving users precise control over the complexity of the generated text and empowering them to select the level of detail and technicality they need.
%R 10.18653/v1/2025.acl-srw.87
%U https://aclanthology.org/2025.acl-srw.87/
%U https://doi.org/10.18653/v1/2025.acl-srw.87
%P 1111-1131
Markdown (Informal)
[A Framework for Fine-Grained Complexity Control in Health Answer Generation](https://aclanthology.org/2025.acl-srw.87/) (Ferreira et al., ACL 2025)
ACL