@inproceedings{vacareanu-etal-2020-unsupervised,
title = "An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification",
author = "Vacareanu, Robert and
Valenzuela-Esc{\'a}rcega, Marco A. and
Sharp, Rebecca and
Surdeanu, Mihai",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.297",
doi = "10.18653/v1/2020.coling-main.297",
pages = "3346--3356",
abstract = "This paper explores an unsupervised approach to learning a compositional representation function for multi-word expressions (MWEs), and evaluates it on the Tratz dataset, which associates two-word expressions with the semantic relation between the compound constituents (e.g. the label employer is associated with the noun compound government agency) (Tratz, 2011). The composition function is based on recurrent neural networks, and is trained using the Skip-Gram objective to predict the words in the context of MWEs. Thus our approach can naturally leverage large unlabeled text sources. Further, our method can make use of provided MWEs when available, but can also function as a completely unsupervised algorithm, using MWE boundaries predicted by a single, domain-agnostic part-of-speech pattern. With pre-defined MWE boundaries, our method outperforms the previous state-of-the-art performance on the coarse-grained evaluation of the Tratz dataset (Tratz, 2011), with an F1 score of 50.4{\%}. The unsupervised version of our method approaches the performance of the supervised one, and even outperforms it in some configurations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vacareanu-etal-2020-unsupervised">
<titleInfo>
<title>An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Vacareanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Valenzuela-Escárcega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Sharp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper explores an unsupervised approach to learning a compositional representation function for multi-word expressions (MWEs), and evaluates it on the Tratz dataset, which associates two-word expressions with the semantic relation between the compound constituents (e.g. the label employer is associated with the noun compound government agency) (Tratz, 2011). The composition function is based on recurrent neural networks, and is trained using the Skip-Gram objective to predict the words in the context of MWEs. Thus our approach can naturally leverage large unlabeled text sources. Further, our method can make use of provided MWEs when available, but can also function as a completely unsupervised algorithm, using MWE boundaries predicted by a single, domain-agnostic part-of-speech pattern. With pre-defined MWE boundaries, our method outperforms the previous state-of-the-art performance on the coarse-grained evaluation of the Tratz dataset (Tratz, 2011), with an F1 score of 50.4%. The unsupervised version of our method approaches the performance of the supervised one, and even outperforms it in some configurations.</abstract>
<identifier type="citekey">vacareanu-etal-2020-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.297</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.297</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>3346</start>
<end>3356</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification
%A Vacareanu, Robert
%A Valenzuela-Escárcega, Marco A.
%A Sharp, Rebecca
%A Surdeanu, Mihai
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F vacareanu-etal-2020-unsupervised
%X This paper explores an unsupervised approach to learning a compositional representation function for multi-word expressions (MWEs), and evaluates it on the Tratz dataset, which associates two-word expressions with the semantic relation between the compound constituents (e.g. the label employer is associated with the noun compound government agency) (Tratz, 2011). The composition function is based on recurrent neural networks, and is trained using the Skip-Gram objective to predict the words in the context of MWEs. Thus our approach can naturally leverage large unlabeled text sources. Further, our method can make use of provided MWEs when available, but can also function as a completely unsupervised algorithm, using MWE boundaries predicted by a single, domain-agnostic part-of-speech pattern. With pre-defined MWE boundaries, our method outperforms the previous state-of-the-art performance on the coarse-grained evaluation of the Tratz dataset (Tratz, 2011), with an F1 score of 50.4%. The unsupervised version of our method approaches the performance of the supervised one, and even outperforms it in some configurations.
%R 10.18653/v1/2020.coling-main.297
%U https://aclanthology.org/2020.coling-main.297
%U https://doi.org/10.18653/v1/2020.coling-main.297
%P 3346-3356
Markdown (Informal)
[An Unsupervised Method for Learning Representations of Multi-word Expressions for Semantic Classification](https://aclanthology.org/2020.coling-main.297) (Vacareanu et al., COLING 2020)
ACL