@inproceedings{nainia-etal-2025-beyond, title = "Beyond {BLEU}: Ethical Risks of Misleading Evaluation in Domain-Specific {QA} with {LLM}s", author = "Nainia, Ayoub and Vignes-Lebbe, R{\'e}gine and Mousannif, Hajar and Zahir, Jihad", editor = "Picazo-Izquierdo, Alicia and Estevanell-Valladares, Ernesto Luis and Mitkov, Ruslan and Guillena, Rafael Mu{\~n}oz and Cerd{\'a}, Ra{\'u}l Garc{\'i}a", booktitle = "Proceedings of the First Workshop on Comparative Performance Evaluation: From Rules to Language Models", month = sep, year = "2025", address = "Varna, Bulgaria", publisher = "INCOMA Ltd., Shoumen, Bulgaria", url = "https://aclanthology.org/2025.r2lm-1.9/", pages = "77--86" }