@article{kim-etal-2019-categorical,
title = "Categorical Metadata Representation for Customized Text Classification",
author = "Kim, Jihyeok and
Amplayo, Reinald Kim and
Lee, Kyungjae and
Sung, Sua and
Seo, Minji and
Hwang, Seung-won",
editor = "Lee, Lillian and
Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "7",
year = "2019",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q19-1013",
doi = "10.1162/tacl_a_00263",
pages = "201--215",
abstract = "The performance of text classification has improved tremendously using intelligently engineered neural-based models, especially those injecting categorical metadata as additional information, e.g., using user/product information for sentiment classification. This information has been used to modify parts of the model (e.g., word embeddings, attention mechanisms) such that results can be customized according to the metadata. We observe that current representation methods for categorical metadata, which are devised for human consumption, are not as effective as claimed in popular classification methods, outperformed even by simple concatenation of categorical features in the final layer of the sentence encoder. We conjecture that categorical features are harder to represent for machine use, as available context only indirectly describes the category, and even such context is often scarce (for tail category). To this end, we propose using basis vectors to effectively incorporate categorical metadata on various parts of a neural-based model. This additionally decreases the number of parameters dramatically, especially when the number of categorical features is large. Extensive experiments on various data sets with different properties are performed and show that through our method, we can represent categorical metadata more effectively to customize parts of the model, including unexplored ones, and increase the performance of the model greatly.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2019-categorical">
<titleInfo>
<title>Categorical Metadata Representation for Customized Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jihyeok</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reinald</namePart>
<namePart type="given">Kim</namePart>
<namePart type="family">Amplayo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyungjae</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sua</namePart>
<namePart type="family">Sung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minji</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-won</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>The performance of text classification has improved tremendously using intelligently engineered neural-based models, especially those injecting categorical metadata as additional information, e.g., using user/product information for sentiment classification. This information has been used to modify parts of the model (e.g., word embeddings, attention mechanisms) such that results can be customized according to the metadata. We observe that current representation methods for categorical metadata, which are devised for human consumption, are not as effective as claimed in popular classification methods, outperformed even by simple concatenation of categorical features in the final layer of the sentence encoder. We conjecture that categorical features are harder to represent for machine use, as available context only indirectly describes the category, and even such context is often scarce (for tail category). To this end, we propose using basis vectors to effectively incorporate categorical metadata on various parts of a neural-based model. This additionally decreases the number of parameters dramatically, especially when the number of categorical features is large. Extensive experiments on various data sets with different properties are performed and show that through our method, we can represent categorical metadata more effectively to customize parts of the model, including unexplored ones, and increase the performance of the model greatly.</abstract>
<identifier type="citekey">kim-etal-2019-categorical</identifier>
<identifier type="doi">10.1162/tacl_a_00263</identifier>
<location>
<url>https://aclanthology.org/Q19-1013</url>
</location>
<part>
<date>2019</date>
<detail type="volume"><number>7</number></detail>
<extent unit="page">
<start>201</start>
<end>215</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Categorical Metadata Representation for Customized Text Classification
%A Kim, Jihyeok
%A Amplayo, Reinald Kim
%A Lee, Kyungjae
%A Sung, Sua
%A Seo, Minji
%A Hwang, Seung-won
%J Transactions of the Association for Computational Linguistics
%D 2019
%V 7
%I MIT Press
%C Cambridge, MA
%F kim-etal-2019-categorical
%X The performance of text classification has improved tremendously using intelligently engineered neural-based models, especially those injecting categorical metadata as additional information, e.g., using user/product information for sentiment classification. This information has been used to modify parts of the model (e.g., word embeddings, attention mechanisms) such that results can be customized according to the metadata. We observe that current representation methods for categorical metadata, which are devised for human consumption, are not as effective as claimed in popular classification methods, outperformed even by simple concatenation of categorical features in the final layer of the sentence encoder. We conjecture that categorical features are harder to represent for machine use, as available context only indirectly describes the category, and even such context is often scarce (for tail category). To this end, we propose using basis vectors to effectively incorporate categorical metadata on various parts of a neural-based model. This additionally decreases the number of parameters dramatically, especially when the number of categorical features is large. Extensive experiments on various data sets with different properties are performed and show that through our method, we can represent categorical metadata more effectively to customize parts of the model, including unexplored ones, and increase the performance of the model greatly.
%R 10.1162/tacl_a_00263
%U https://aclanthology.org/Q19-1013
%U https://doi.org/10.1162/tacl_a_00263
%P 201-215
Markdown (Informal)
[Categorical Metadata Representation for Customized Text Classification](https://aclanthology.org/Q19-1013) (Kim et al., TACL 2019)
ACL