@inproceedings{antoine-etal-2024-extrinsic,
title = "Extrinsic evaluation of question generation methods with user journey logs",
author = "Antoine, Elie and
Besnehard, El{\'e}onore and
Bechet, Frederic and
Damnati, Geraldine and
Kergosien, Eric and
Laborderie, Arnaud",
editor = "Balloccu, Simone and
Belz, Anya and
Huidrom, Rudali and
Reiter, Ehud and
Sedoc, Joao and
Thomson, Craig",
booktitle = "Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.humeval-1.6",
pages = "63--70",
abstract = "There is often a significant disparity between the performance of Natural Language Processing (NLP) tools as evaluated on benchmark datasets using metrics like ROUGE or BLEU, and the actual user experience encountered when employing these tools in real-world scenarios. This highlights the critical necessity for user-oriented studies aimed at evaluating user experience concerning the effectiveness of developed methodologies. A primary challenge in such {``}ecological{''} user studies is their assessment of specific configurations of NLP tools, making replication under identical conditions impractical. Consequently, their utility is limited for the automated evaluation and comparison of different configurations of the same tool. The objective of this study is to conduct an {``}ecological{''} evaluation of a question generation within the context of an external task involving document linking. To do this we conducted an ''\textit{ecological}'' evaluation of a document linking tool in the context of the exploration of a Social Science archives and from this evaluation, we aim to derive a form of a {``}reference corpus{''} that can be used offline for the automated comparison of models and quantitative tool assessment. This corpus is available on the following link: https://gitlab.lis-lab.fr/archival-public/autogestion-qa-linking",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="antoine-etal-2024-extrinsic">
<titleInfo>
<title>Extrinsic evaluation of question generation methods with user journey logs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elie</namePart>
<namePart type="family">Antoine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eléonore</namePart>
<namePart type="family">Besnehard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederic</namePart>
<namePart type="family">Bechet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geraldine</namePart>
<namePart type="family">Damnati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Kergosien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arnaud</namePart>
<namePart type="family">Laborderie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Balloccu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudali</namePart>
<namePart type="family">Huidrom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joao</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>There is often a significant disparity between the performance of Natural Language Processing (NLP) tools as evaluated on benchmark datasets using metrics like ROUGE or BLEU, and the actual user experience encountered when employing these tools in real-world scenarios. This highlights the critical necessity for user-oriented studies aimed at evaluating user experience concerning the effectiveness of developed methodologies. A primary challenge in such “ecological” user studies is their assessment of specific configurations of NLP tools, making replication under identical conditions impractical. Consequently, their utility is limited for the automated evaluation and comparison of different configurations of the same tool. The objective of this study is to conduct an “ecological” evaluation of a question generation within the context of an external task involving document linking. To do this we conducted an ”ecological” evaluation of a document linking tool in the context of the exploration of a Social Science archives and from this evaluation, we aim to derive a form of a “reference corpus” that can be used offline for the automated comparison of models and quantitative tool assessment. This corpus is available on the following link: https://gitlab.lis-lab.fr/archival-public/autogestion-qa-linking</abstract>
<identifier type="citekey">antoine-etal-2024-extrinsic</identifier>
<location>
<url>https://aclanthology.org/2024.humeval-1.6</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>63</start>
<end>70</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Extrinsic evaluation of question generation methods with user journey logs
%A Antoine, Elie
%A Besnehard, Eléonore
%A Bechet, Frederic
%A Damnati, Geraldine
%A Kergosien, Eric
%A Laborderie, Arnaud
%Y Balloccu, Simone
%Y Belz, Anya
%Y Huidrom, Rudali
%Y Reiter, Ehud
%Y Sedoc, Joao
%Y Thomson, Craig
%S Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F antoine-etal-2024-extrinsic
%X There is often a significant disparity between the performance of Natural Language Processing (NLP) tools as evaluated on benchmark datasets using metrics like ROUGE or BLEU, and the actual user experience encountered when employing these tools in real-world scenarios. This highlights the critical necessity for user-oriented studies aimed at evaluating user experience concerning the effectiveness of developed methodologies. A primary challenge in such “ecological” user studies is their assessment of specific configurations of NLP tools, making replication under identical conditions impractical. Consequently, their utility is limited for the automated evaluation and comparison of different configurations of the same tool. The objective of this study is to conduct an “ecological” evaluation of a question generation within the context of an external task involving document linking. To do this we conducted an ”ecological” evaluation of a document linking tool in the context of the exploration of a Social Science archives and from this evaluation, we aim to derive a form of a “reference corpus” that can be used offline for the automated comparison of models and quantitative tool assessment. This corpus is available on the following link: https://gitlab.lis-lab.fr/archival-public/autogestion-qa-linking
%U https://aclanthology.org/2024.humeval-1.6
%P 63-70
Markdown (Informal)
[Extrinsic evaluation of question generation methods with user journey logs](https://aclanthology.org/2024.humeval-1.6) (Antoine et al., HumEval-WS 2024)
ACL