@inproceedings{sertkan-etal-2023-ranger,
title = "Ranger: A Toolkit for Effect-Size Based Multi-Task Evaluation",
author = {Sertkan, Mete and
Althammer, Sophia and
Hofst{\"a}tter, Sebastian},
editor = "Bollegala, Danushka and
Huang, Ruihong and
Ritter, Alan",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-demo.56/",
doi = "10.18653/v1/2023.acl-demo.56",
pages = "581--587",
abstract = "In this paper, we introduce Ranger - a toolkit to facilitate the easy use of effect-size-based meta-analysis for multi-task evaluation in NLP and IR. We observed that our communities often face the challenge of aggregating results over incomparable metrics and scenarios, which makes conclusions and take-away messages less reliable. With Ranger, we aim to address this issue by providing a task-agnostic toolkit that combines the effect of a treatment on multiple tasks into one statistical evaluation, allowing for comparison of metrics and computation of an overall summary effect. Our toolkit produces publication-ready forest plots that enable clear communication of evaluation results over multiple tasks. Our goal with the ready-to-use Ranger toolkit is to promote robust, effect-size-based evaluation and improve evaluation standards in the community. We provide two case studies for common IR and NLP settings to highlight Ranger`s benefits."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sertkan-etal-2023-ranger">
<titleInfo>
<title>Ranger: A Toolkit for Effect-Size Based Multi-Task Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mete</namePart>
<namePart type="family">Sertkan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Althammer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Hofstätter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danushka</namePart>
<namePart type="family">Bollegala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we introduce Ranger - a toolkit to facilitate the easy use of effect-size-based meta-analysis for multi-task evaluation in NLP and IR. We observed that our communities often face the challenge of aggregating results over incomparable metrics and scenarios, which makes conclusions and take-away messages less reliable. With Ranger, we aim to address this issue by providing a task-agnostic toolkit that combines the effect of a treatment on multiple tasks into one statistical evaluation, allowing for comparison of metrics and computation of an overall summary effect. Our toolkit produces publication-ready forest plots that enable clear communication of evaluation results over multiple tasks. Our goal with the ready-to-use Ranger toolkit is to promote robust, effect-size-based evaluation and improve evaluation standards in the community. We provide two case studies for common IR and NLP settings to highlight Ranger‘s benefits.</abstract>
<identifier type="citekey">sertkan-etal-2023-ranger</identifier>
<identifier type="doi">10.18653/v1/2023.acl-demo.56</identifier>
<location>
<url>https://aclanthology.org/2023.acl-demo.56/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>581</start>
<end>587</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Ranger: A Toolkit for Effect-Size Based Multi-Task Evaluation
%A Sertkan, Mete
%A Althammer, Sophia
%A Hofstätter, Sebastian
%Y Bollegala, Danushka
%Y Huang, Ruihong
%Y Ritter, Alan
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F sertkan-etal-2023-ranger
%X In this paper, we introduce Ranger - a toolkit to facilitate the easy use of effect-size-based meta-analysis for multi-task evaluation in NLP and IR. We observed that our communities often face the challenge of aggregating results over incomparable metrics and scenarios, which makes conclusions and take-away messages less reliable. With Ranger, we aim to address this issue by providing a task-agnostic toolkit that combines the effect of a treatment on multiple tasks into one statistical evaluation, allowing for comparison of metrics and computation of an overall summary effect. Our toolkit produces publication-ready forest plots that enable clear communication of evaluation results over multiple tasks. Our goal with the ready-to-use Ranger toolkit is to promote robust, effect-size-based evaluation and improve evaluation standards in the community. We provide two case studies for common IR and NLP settings to highlight Ranger‘s benefits.
%R 10.18653/v1/2023.acl-demo.56
%U https://aclanthology.org/2023.acl-demo.56/
%U https://doi.org/10.18653/v1/2023.acl-demo.56
%P 581-587
Markdown (Informal)
[Ranger: A Toolkit for Effect-Size Based Multi-Task Evaluation](https://aclanthology.org/2023.acl-demo.56/) (Sertkan et al., ACL 2023)
ACL
- Mete Sertkan, Sophia Althammer, and Sebastian Hofstätter. 2023. Ranger: A Toolkit for Effect-Size Based Multi-Task Evaluation. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 581–587, Toronto, Canada. Association for Computational Linguistics.