@inproceedings{han-etal-2022-towards-fair,
title = "Towards Fair Dataset Distillation for Text Classification",
author = "Han, Xudong and
Shen, Aili and
Li, Yitong and
Frermann, Lea and
Baldwin, Timothy and
Cohn, Trevor",
editor = {Fan, Angela and
Gurevych, Iryna and
Hou, Yufang and
Kozareva, Zornitsa and
Luccioni, Sasha and
Sadat Moosavi, Nafise and
Ravi, Sujith and
Kim, Gyuwan and
Schwartz, Roy and
R{\"u}ckl{\'e}, Andreas},
booktitle = "Proceedings of the Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sustainlp-1.13/",
doi = "10.18653/v1/2022.sustainlp-1.13",
pages = "65--72",
abstract = "With the growing prevalence of large-scale language models, their energy footprint and potential to learn and amplify historical biases are two pressing challenges. Dataset distillation (DD) {---} a method for reducing the dataset size by learning a small number of synthetic samples which encode the information in the original dataset {---} is a method for reducing the cost of model training, however its impact on fairness has not been studied. We investigate how DD impacts on group bias, with experiments over two language classification tasks, concluding that vanilla DD preserves the bias of the dataset. We then show how existing debiasing methods can be combined with DD to produce models that are fair and accurate, at reduced training cost."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="han-etal-2022-towards-fair">
<titleInfo>
<title>Towards Fair Dataset Distillation for Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xudong</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aili</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yitong</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lea</namePart>
<namePart type="family">Frermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yufang</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sasha</namePart>
<namePart type="family">Luccioni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="family">Sadat Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujith</namePart>
<namePart type="family">Ravi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gyuwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roy</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Rücklé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the growing prevalence of large-scale language models, their energy footprint and potential to learn and amplify historical biases are two pressing challenges. Dataset distillation (DD) — a method for reducing the dataset size by learning a small number of synthetic samples which encode the information in the original dataset — is a method for reducing the cost of model training, however its impact on fairness has not been studied. We investigate how DD impacts on group bias, with experiments over two language classification tasks, concluding that vanilla DD preserves the bias of the dataset. We then show how existing debiasing methods can be combined with DD to produce models that are fair and accurate, at reduced training cost.</abstract>
<identifier type="citekey">han-etal-2022-towards-fair</identifier>
<identifier type="doi">10.18653/v1/2022.sustainlp-1.13</identifier>
<location>
<url>https://aclanthology.org/2022.sustainlp-1.13/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>65</start>
<end>72</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Fair Dataset Distillation for Text Classification
%A Han, Xudong
%A Shen, Aili
%A Li, Yitong
%A Frermann, Lea
%A Baldwin, Timothy
%A Cohn, Trevor
%Y Fan, Angela
%Y Gurevych, Iryna
%Y Hou, Yufang
%Y Kozareva, Zornitsa
%Y Luccioni, Sasha
%Y Sadat Moosavi, Nafise
%Y Ravi, Sujith
%Y Kim, Gyuwan
%Y Schwartz, Roy
%Y Rücklé, Andreas
%S Proceedings of the Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F han-etal-2022-towards-fair
%X With the growing prevalence of large-scale language models, their energy footprint and potential to learn and amplify historical biases are two pressing challenges. Dataset distillation (DD) — a method for reducing the dataset size by learning a small number of synthetic samples which encode the information in the original dataset — is a method for reducing the cost of model training, however its impact on fairness has not been studied. We investigate how DD impacts on group bias, with experiments over two language classification tasks, concluding that vanilla DD preserves the bias of the dataset. We then show how existing debiasing methods can be combined with DD to produce models that are fair and accurate, at reduced training cost.
%R 10.18653/v1/2022.sustainlp-1.13
%U https://aclanthology.org/2022.sustainlp-1.13/
%U https://doi.org/10.18653/v1/2022.sustainlp-1.13
%P 65-72
Markdown (Informal)
[Towards Fair Dataset Distillation for Text Classification](https://aclanthology.org/2022.sustainlp-1.13/) (Han et al., sustainlp 2022)
ACL
- Xudong Han, Aili Shen, Yitong Li, Lea Frermann, Timothy Baldwin, and Trevor Cohn. 2022. Towards Fair Dataset Distillation for Text Classification. In Proceedings of the Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP), pages 65–72, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.