@inproceedings{afzal-etal-2025-candidate,
title = "Candidate Profile Summarization: A {RAG} Approach with Synthetic Data Generation for Tech Jobs",
author = "Afzal, Anum and
Subedi, Ishwor and
Matthes, Florian",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.3/",
pages = "22--31",
abstract = "As Large Language Models (LLMs) become increasingly applied to resume evaluation and candidate selection, this study investigates the effectiveness of using in-context example resumes to generate synthetic data. We compare a Retrieval-Augmented Generation (RAG) system to a Named Entity Recognition (NER)-based baseline for job-resume matching, generating diverse synthetic resumes with models like Mixtral-8x22B-Instruct-v0.1. Our results show that combining BERT, ROUGE, and Jaccard similarity metrics effectively assesses synthetic resume quality, ensuring the least lexical overlap along with high similarity and diversity. Our experiments show that RAG notably outperforms NER for retrieval tasks{---}though generation-based summarization remains challenged by role differentiation. Human evaluation further highlights issues of factual accuracy and completeness, emphasizing the importance of in-context examples, prompt engineering, and improvements in summary generation for robust, automated candidate selection."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="afzal-etal-2025-candidate">
<titleInfo>
<title>Candidate Profile Summarization: A RAG Approach with Synthetic Data Generation for Tech Jobs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anum</namePart>
<namePart type="family">Afzal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ishwor</namePart>
<namePart type="family">Subedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Matthes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As Large Language Models (LLMs) become increasingly applied to resume evaluation and candidate selection, this study investigates the effectiveness of using in-context example resumes to generate synthetic data. We compare a Retrieval-Augmented Generation (RAG) system to a Named Entity Recognition (NER)-based baseline for job-resume matching, generating diverse synthetic resumes with models like Mixtral-8x22B-Instruct-v0.1. Our results show that combining BERT, ROUGE, and Jaccard similarity metrics effectively assesses synthetic resume quality, ensuring the least lexical overlap along with high similarity and diversity. Our experiments show that RAG notably outperforms NER for retrieval tasks—though generation-based summarization remains challenged by role differentiation. Human evaluation further highlights issues of factual accuracy and completeness, emphasizing the importance of in-context examples, prompt engineering, and improvements in summary generation for robust, automated candidate selection.</abstract>
<identifier type="citekey">afzal-etal-2025-candidate</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.3/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>22</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Candidate Profile Summarization: A RAG Approach with Synthetic Data Generation for Tech Jobs
%A Afzal, Anum
%A Subedi, Ishwor
%A Matthes, Florian
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F afzal-etal-2025-candidate
%X As Large Language Models (LLMs) become increasingly applied to resume evaluation and candidate selection, this study investigates the effectiveness of using in-context example resumes to generate synthetic data. We compare a Retrieval-Augmented Generation (RAG) system to a Named Entity Recognition (NER)-based baseline for job-resume matching, generating diverse synthetic resumes with models like Mixtral-8x22B-Instruct-v0.1. Our results show that combining BERT, ROUGE, and Jaccard similarity metrics effectively assesses synthetic resume quality, ensuring the least lexical overlap along with high similarity and diversity. Our experiments show that RAG notably outperforms NER for retrieval tasks—though generation-based summarization remains challenged by role differentiation. Human evaluation further highlights issues of factual accuracy and completeness, emphasizing the importance of in-context examples, prompt engineering, and improvements in summary generation for robust, automated candidate selection.
%U https://aclanthology.org/2025.ranlp-1.3/
%P 22-31
Markdown (Informal)
[Candidate Profile Summarization: A RAG Approach with Synthetic Data Generation for Tech Jobs](https://aclanthology.org/2025.ranlp-1.3/) (Afzal et al., RANLP 2025)
ACL