@inproceedings{sofi-etal-2022-robustness,
title = "A Robustness Evaluation Framework for Argument Mining",
author = "Sofi, Mehmet and
Fortier, Matteo and
Cocarascu, Oana",
editor = "Lapesa, Gabriella and
Schneider, Jodi and
Jo, Yohan and
Saha, Sougata",
booktitle = "Proceedings of the 9th Workshop on Argument Mining",
month = oct,
year = "2022",
address = "Online and in Gyeongju, Republic of Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2022.argmining-1.16/",
pages = "171--180",
abstract = "Standard practice for evaluating the performance of machine learning models for argument mining is to report different metrics such as accuracy or F1. However, little is usually known about the model`s stability and consistency when deployed in real-world settings. In this paper, we propose a robustness evaluation framework to guide the design of rigorous argument mining models. As part of the framework, we introduce several novel robustness tests tailored specifically to argument mining tasks. Additionally, we integrate existing robustness tests designed for other natural language processing tasks and re-purpose them for argument mining. Finally, we illustrate the utility of our framework on two widely used argument mining corpora, UKP topic-sentences and IBM Debater Evidence Sentence. We argue that our framework should be used in conjunction with standard performance evaluation techniques as a measure of model stability."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sofi-etal-2022-robustness">
<titleInfo>
<title>A Robustness Evaluation Framework for Argument Mining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="family">Sofi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Fortier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Argument Mining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Lapesa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jodi</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohan</namePart>
<namePart type="family">Jo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sougata</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and in Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Standard practice for evaluating the performance of machine learning models for argument mining is to report different metrics such as accuracy or F1. However, little is usually known about the model‘s stability and consistency when deployed in real-world settings. In this paper, we propose a robustness evaluation framework to guide the design of rigorous argument mining models. As part of the framework, we introduce several novel robustness tests tailored specifically to argument mining tasks. Additionally, we integrate existing robustness tests designed for other natural language processing tasks and re-purpose them for argument mining. Finally, we illustrate the utility of our framework on two widely used argument mining corpora, UKP topic-sentences and IBM Debater Evidence Sentence. We argue that our framework should be used in conjunction with standard performance evaluation techniques as a measure of model stability.</abstract>
<identifier type="citekey">sofi-etal-2022-robustness</identifier>
<location>
<url>https://aclanthology.org/2022.argmining-1.16/</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>171</start>
<end>180</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Robustness Evaluation Framework for Argument Mining
%A Sofi, Mehmet
%A Fortier, Matteo
%A Cocarascu, Oana
%Y Lapesa, Gabriella
%Y Schneider, Jodi
%Y Jo, Yohan
%Y Saha, Sougata
%S Proceedings of the 9th Workshop on Argument Mining
%D 2022
%8 October
%I International Conference on Computational Linguistics
%C Online and in Gyeongju, Republic of Korea
%F sofi-etal-2022-robustness
%X Standard practice for evaluating the performance of machine learning models for argument mining is to report different metrics such as accuracy or F1. However, little is usually known about the model‘s stability and consistency when deployed in real-world settings. In this paper, we propose a robustness evaluation framework to guide the design of rigorous argument mining models. As part of the framework, we introduce several novel robustness tests tailored specifically to argument mining tasks. Additionally, we integrate existing robustness tests designed for other natural language processing tasks and re-purpose them for argument mining. Finally, we illustrate the utility of our framework on two widely used argument mining corpora, UKP topic-sentences and IBM Debater Evidence Sentence. We argue that our framework should be used in conjunction with standard performance evaluation techniques as a measure of model stability.
%U https://aclanthology.org/2022.argmining-1.16/
%P 171-180
Markdown (Informal)
[A Robustness Evaluation Framework for Argument Mining](https://aclanthology.org/2022.argmining-1.16/) (Sofi et al., ArgMining 2022)
ACL
- Mehmet Sofi, Matteo Fortier, and Oana Cocarascu. 2022. A Robustness Evaluation Framework for Argument Mining. In Proceedings of the 9th Workshop on Argument Mining, pages 171–180, Online and in Gyeongju, Republic of Korea. International Conference on Computational Linguistics.