@inproceedings{wang-banko-2021-practical,
title = "Practical Transformer-based Multilingual Text Classification",
author = "Wang, Cindy and
Banko, Michele",
editor = "Kim, Young-bum and
Li, Yunyao and
Rambow, Owen",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-industry.16",
doi = "10.18653/v1/2021.naacl-industry.16",
pages = "121--129",
abstract = "Transformer-based methods are appealing for multilingual text classification, but common research benchmarks like XNLI (Conneau et al., 2018) do not reflect the data availability and task variety of industry applications. We present an empirical comparison of transformer-based text classification models in a variety of practical monolingual and multilingual pretraining and fine-tuning settings. We evaluate these methods on two distinct tasks in five different languages. Departing from prior work, our results show that multilingual language models can outperform monolingual ones in some downstream tasks and target languages. We additionally show that practical modifications such as task- and domain-adaptive pretraining and data augmentation can improve classification performance without the need for additional labeled data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-banko-2021-practical">
<titleInfo>
<title>Practical Transformer-based Multilingual Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cindy</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michele</namePart>
<namePart type="family">Banko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Young-bum</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Transformer-based methods are appealing for multilingual text classification, but common research benchmarks like XNLI (Conneau et al., 2018) do not reflect the data availability and task variety of industry applications. We present an empirical comparison of transformer-based text classification models in a variety of practical monolingual and multilingual pretraining and fine-tuning settings. We evaluate these methods on two distinct tasks in five different languages. Departing from prior work, our results show that multilingual language models can outperform monolingual ones in some downstream tasks and target languages. We additionally show that practical modifications such as task- and domain-adaptive pretraining and data augmentation can improve classification performance without the need for additional labeled data.</abstract>
<identifier type="citekey">wang-banko-2021-practical</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-industry.16</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-industry.16</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>121</start>
<end>129</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Practical Transformer-based Multilingual Text Classification
%A Wang, Cindy
%A Banko, Michele
%Y Kim, Young-bum
%Y Li, Yunyao
%Y Rambow, Owen
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F wang-banko-2021-practical
%X Transformer-based methods are appealing for multilingual text classification, but common research benchmarks like XNLI (Conneau et al., 2018) do not reflect the data availability and task variety of industry applications. We present an empirical comparison of transformer-based text classification models in a variety of practical monolingual and multilingual pretraining and fine-tuning settings. We evaluate these methods on two distinct tasks in five different languages. Departing from prior work, our results show that multilingual language models can outperform monolingual ones in some downstream tasks and target languages. We additionally show that practical modifications such as task- and domain-adaptive pretraining and data augmentation can improve classification performance without the need for additional labeled data.
%R 10.18653/v1/2021.naacl-industry.16
%U https://aclanthology.org/2021.naacl-industry.16
%U https://doi.org/10.18653/v1/2021.naacl-industry.16
%P 121-129
Markdown (Informal)
[Practical Transformer-based Multilingual Text Classification](https://aclanthology.org/2021.naacl-industry.16) (Wang & Banko, NAACL 2021)
ACL
- Cindy Wang and Michele Banko. 2021. Practical Transformer-based Multilingual Text Classification. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers, pages 121–129, Online. Association for Computational Linguistics.