@article{pappas-henderson-2019-gile,
title = "{GILE}: A Generalized Input-Label Embedding for Text Classification",
author = "Pappas, Nikolaos and
Henderson, James",
editor = "Lee, Lillian and
Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "7",
year = "2019",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q19-1009",
doi = "10.1162/tacl_a_00259",
pages = "139--155",
abstract = "Neural text classification models typically treat output labels as categorical variables that lack description and semantics. This forces their parametrization to be dependent on the label set size, and, hence, they are unable to scale to large label sets and generalize to unseen ones. Existing joint input-label text models overcome these issues by exploiting label descriptions, but they are unable to capture complex label relationships, have rigid parametrization, and their gains on unseen labels happen often at the expense of weak performance on the labels seen during training. In this paper, we propose a new input-label model that generalizes over previous such models, addresses their limitations, and does not compromise performance on seen labels. The model consists of a joint nonlinear input-label embedding with controllable capacity and a joint-space-dependent classification unit that is trained with cross-entropy loss to optimize classification performance. We evaluate models on full-resource and low- or zero-resource text classification of multilingual news and biomedical text with a large label set. Our model outperforms monolingual and multilingual models that do not leverage label semantics and previous joint input-label space models in both scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pappas-henderson-2019-gile">
<titleInfo>
<title>GILE: A Generalized Input-Label Embedding for Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Pappas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Henderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Neural text classification models typically treat output labels as categorical variables that lack description and semantics. This forces their parametrization to be dependent on the label set size, and, hence, they are unable to scale to large label sets and generalize to unseen ones. Existing joint input-label text models overcome these issues by exploiting label descriptions, but they are unable to capture complex label relationships, have rigid parametrization, and their gains on unseen labels happen often at the expense of weak performance on the labels seen during training. In this paper, we propose a new input-label model that generalizes over previous such models, addresses their limitations, and does not compromise performance on seen labels. The model consists of a joint nonlinear input-label embedding with controllable capacity and a joint-space-dependent classification unit that is trained with cross-entropy loss to optimize classification performance. We evaluate models on full-resource and low- or zero-resource text classification of multilingual news and biomedical text with a large label set. Our model outperforms monolingual and multilingual models that do not leverage label semantics and previous joint input-label space models in both scenarios.</abstract>
<identifier type="citekey">pappas-henderson-2019-gile</identifier>
<identifier type="doi">10.1162/tacl_a_00259</identifier>
<location>
<url>https://aclanthology.org/Q19-1009</url>
</location>
<part>
<date>2019</date>
<detail type="volume"><number>7</number></detail>
<extent unit="page">
<start>139</start>
<end>155</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T GILE: A Generalized Input-Label Embedding for Text Classification
%A Pappas, Nikolaos
%A Henderson, James
%J Transactions of the Association for Computational Linguistics
%D 2019
%V 7
%I MIT Press
%C Cambridge, MA
%F pappas-henderson-2019-gile
%X Neural text classification models typically treat output labels as categorical variables that lack description and semantics. This forces their parametrization to be dependent on the label set size, and, hence, they are unable to scale to large label sets and generalize to unseen ones. Existing joint input-label text models overcome these issues by exploiting label descriptions, but they are unable to capture complex label relationships, have rigid parametrization, and their gains on unseen labels happen often at the expense of weak performance on the labels seen during training. In this paper, we propose a new input-label model that generalizes over previous such models, addresses their limitations, and does not compromise performance on seen labels. The model consists of a joint nonlinear input-label embedding with controllable capacity and a joint-space-dependent classification unit that is trained with cross-entropy loss to optimize classification performance. We evaluate models on full-resource and low- or zero-resource text classification of multilingual news and biomedical text with a large label set. Our model outperforms monolingual and multilingual models that do not leverage label semantics and previous joint input-label space models in both scenarios.
%R 10.1162/tacl_a_00259
%U https://aclanthology.org/Q19-1009
%U https://doi.org/10.1162/tacl_a_00259
%P 139-155
Markdown (Informal)
[GILE: A Generalized Input-Label Embedding for Text Classification](https://aclanthology.org/Q19-1009) (Pappas & Henderson, TACL 2019)
ACL