@inproceedings{shahgir-etal-2025-expertgenqa,
title = "{E}xpert{G}en{QA}: Open-ended {QA} generation in Specialized Domains",
author = "Shahgir, Haz Sameen and
Lim, Chansong and
Chen, Jia and
Papalexakis, Evangelos E. and
Dong, Yue",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.159/",
pages = "2934--2955",
ISBN = "979-8-89176-335-7",
abstract = "Generating high-quality question{--}answer (QA) pairs for specialized technical domains is essential for advancing knowledge comprehension, yet remains challenging. Existing methods often yield generic or shallow questions that fail to reflect the depth and structure of expert-written examples. We propose ExpertGenQA, a generation protocol that combines few-shot prompting with dual categorization by topic and question style to produce more diverse and cognitively meaningful QA pairs. ExpertGenQA achieves twice the efficiency of standard few-shot methods while maintaining 94.4{\%} topic coverage. Unlike LLM-based judges, which often favor surface fluency, Bloom{'}s Taxonomy analysis shows that ExpertGenQA better captures expert-level cognitive complexity. When used to train retrieval systems, our questions improve top-1 accuracy by 13.02{\%}, demonstrating their practical value for domain-specific applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shahgir-etal-2025-expertgenqa">
<titleInfo>
<title>ExpertGenQA: Open-ended QA generation in Specialized Domains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haz</namePart>
<namePart type="given">Sameen</namePart>
<namePart type="family">Shahgir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chansong</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jia</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evangelos</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Papalexakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Generating high-quality question–answer (QA) pairs for specialized technical domains is essential for advancing knowledge comprehension, yet remains challenging. Existing methods often yield generic or shallow questions that fail to reflect the depth and structure of expert-written examples. We propose ExpertGenQA, a generation protocol that combines few-shot prompting with dual categorization by topic and question style to produce more diverse and cognitively meaningful QA pairs. ExpertGenQA achieves twice the efficiency of standard few-shot methods while maintaining 94.4% topic coverage. Unlike LLM-based judges, which often favor surface fluency, Bloom’s Taxonomy analysis shows that ExpertGenQA better captures expert-level cognitive complexity. When used to train retrieval systems, our questions improve top-1 accuracy by 13.02%, demonstrating their practical value for domain-specific applications.</abstract>
<identifier type="citekey">shahgir-etal-2025-expertgenqa</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.159/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>2934</start>
<end>2955</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ExpertGenQA: Open-ended QA generation in Specialized Domains
%A Shahgir, Haz Sameen
%A Lim, Chansong
%A Chen, Jia
%A Papalexakis, Evangelos E.
%A Dong, Yue
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F shahgir-etal-2025-expertgenqa
%X Generating high-quality question–answer (QA) pairs for specialized technical domains is essential for advancing knowledge comprehension, yet remains challenging. Existing methods often yield generic or shallow questions that fail to reflect the depth and structure of expert-written examples. We propose ExpertGenQA, a generation protocol that combines few-shot prompting with dual categorization by topic and question style to produce more diverse and cognitively meaningful QA pairs. ExpertGenQA achieves twice the efficiency of standard few-shot methods while maintaining 94.4% topic coverage. Unlike LLM-based judges, which often favor surface fluency, Bloom’s Taxonomy analysis shows that ExpertGenQA better captures expert-level cognitive complexity. When used to train retrieval systems, our questions improve top-1 accuracy by 13.02%, demonstrating their practical value for domain-specific applications.
%U https://aclanthology.org/2025.findings-emnlp.159/
%P 2934-2955
Markdown (Informal)
[ExpertGenQA: Open-ended QA generation in Specialized Domains](https://aclanthology.org/2025.findings-emnlp.159/) (Shahgir et al., Findings 2025)
ACL
- Haz Sameen Shahgir, Chansong Lim, Jia Chen, Evangelos E. Papalexakis, and Yue Dong. 2025. ExpertGenQA: Open-ended QA generation in Specialized Domains. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 2934–2955, Suzhou, China. Association for Computational Linguistics.