@inproceedings{chubarian-etal-2021-grouping,
title = "Grouping Words with Semantic Diversity",
author = "Chubarian, Karine and
Khan, Abdul Rafae and
Sidiropoulos, Anastasios and
Xu, Jia",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.257",
doi = "10.18653/v1/2021.naacl-main.257",
pages = "3217--3228",
abstract = "Deep Learning-based NLP systems can be sensitive to unseen tokens and hard to learn with high-dimensional inputs, which critically hinder learning generalization. We introduce an approach by grouping input words based on their semantic diversity to simplify input language representation with low ambiguity. Since the semantically diverse words reside in different contexts, we are able to substitute words with their groups and still distinguish word meanings relying on their contexts. We design several algorithms that compute diverse groupings based on random sampling, geometric distances, and entropy maximization, and we prove formal guarantees for the entropy-based algorithms. Experimental results show that our methods generalize NLP models and demonstrate enhanced accuracy on POS tagging and LM tasks and significant improvements on medium-scale machine translation tasks, up to +6.5 BLEU points. Our source code is available at \url{https://github.com/abdulrafae/dg}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chubarian-etal-2021-grouping">
<titleInfo>
<title>Grouping Words with Semantic Diversity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karine</namePart>
<namePart type="family">Chubarian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdul</namePart>
<namePart type="given">Rafae</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasios</namePart>
<namePart type="family">Sidiropoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jia</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Toutanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iz</namePart>
<namePart type="family">Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep Learning-based NLP systems can be sensitive to unseen tokens and hard to learn with high-dimensional inputs, which critically hinder learning generalization. We introduce an approach by grouping input words based on their semantic diversity to simplify input language representation with low ambiguity. Since the semantically diverse words reside in different contexts, we are able to substitute words with their groups and still distinguish word meanings relying on their contexts. We design several algorithms that compute diverse groupings based on random sampling, geometric distances, and entropy maximization, and we prove formal guarantees for the entropy-based algorithms. Experimental results show that our methods generalize NLP models and demonstrate enhanced accuracy on POS tagging and LM tasks and significant improvements on medium-scale machine translation tasks, up to +6.5 BLEU points. Our source code is available at https://github.com/abdulrafae/dg.</abstract>
<identifier type="citekey">chubarian-etal-2021-grouping</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-main.257</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-main.257</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>3217</start>
<end>3228</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Grouping Words with Semantic Diversity
%A Chubarian, Karine
%A Khan, Abdul Rafae
%A Sidiropoulos, Anastasios
%A Xu, Jia
%Y Toutanova, Kristina
%Y Rumshisky, Anna
%Y Zettlemoyer, Luke
%Y Hakkani-Tur, Dilek
%Y Beltagy, Iz
%Y Bethard, Steven
%Y Cotterell, Ryan
%Y Chakraborty, Tanmoy
%Y Zhou, Yichao
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F chubarian-etal-2021-grouping
%X Deep Learning-based NLP systems can be sensitive to unseen tokens and hard to learn with high-dimensional inputs, which critically hinder learning generalization. We introduce an approach by grouping input words based on their semantic diversity to simplify input language representation with low ambiguity. Since the semantically diverse words reside in different contexts, we are able to substitute words with their groups and still distinguish word meanings relying on their contexts. We design several algorithms that compute diverse groupings based on random sampling, geometric distances, and entropy maximization, and we prove formal guarantees for the entropy-based algorithms. Experimental results show that our methods generalize NLP models and demonstrate enhanced accuracy on POS tagging and LM tasks and significant improvements on medium-scale machine translation tasks, up to +6.5 BLEU points. Our source code is available at https://github.com/abdulrafae/dg.
%R 10.18653/v1/2021.naacl-main.257
%U https://aclanthology.org/2021.naacl-main.257
%U https://doi.org/10.18653/v1/2021.naacl-main.257
%P 3217-3228
Markdown (Informal)
[Grouping Words with Semantic Diversity](https://aclanthology.org/2021.naacl-main.257) (Chubarian et al., NAACL 2021)
ACL
- Karine Chubarian, Abdul Rafae Khan, Anastasios Sidiropoulos, and Jia Xu. 2021. Grouping Words with Semantic Diversity. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 3217–3228, Online. Association for Computational Linguistics.