@inproceedings{sato-etal-2017-distributed,
    title = "Distributed Document and Phrase Co-embeddings for Descriptive Clustering",
    author = "Sato, Motoki  and
      Brockmeier, Austin J.  and
      Kontonatsios, Georgios  and
      Mu, Tingting  and
      Goulermas, John Y.  and
      Tsujii, Jun{'}ichi  and
      Ananiadou, Sophia",
    editor = "Lapata, Mirella  and
      Blunsom, Phil  and
      Koller, Alexander",
    booktitle = "Proceedings of the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics: Volume 1, Long Papers",
    month = apr,
    year = "2017",
    address = "Valencia, Spain",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/E17-1093/",
    pages = "991--1001",
    abstract = "Descriptive document clustering aims to automatically discover groups of semantically related documents and to assign a meaningful label to characterise the content of each cluster. In this paper, we present a descriptive clustering approach that employs a distributed representation model, namely the paragraph vector model, to capture semantic similarities between documents and phrases. The proposed method uses a joint representation of phrases and documents (i.e., a co-embedding) to automatically select a descriptive phrase that best represents each document cluster. We evaluate our method by comparing its performance to an existing state-of-the-art descriptive clustering method that also uses co-embedding but relies on a bag-of-words representation. Results obtained on benchmark datasets demonstrate that the paragraph vector-based method obtains superior performance over the existing approach in both identifying clusters and assigning appropriate descriptive labels to them."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sato-etal-2017-distributed">
    <titleInfo>
        <title>Distributed Document and Phrase Co-embeddings for Descriptive Clustering</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Motoki</namePart>
        <namePart type="family">Sato</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Austin</namePart>
        <namePart type="given">J</namePart>
        <namePart type="family">Brockmeier</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Georgios</namePart>
        <namePart type="family">Kontonatsios</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Tingting</namePart>
        <namePart type="family">Mu</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">John</namePart>
        <namePart type="given">Y</namePart>
        <namePart type="family">Goulermas</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jun’ichi</namePart>
        <namePart type="family">Tsujii</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Sophia</namePart>
        <namePart type="family">Ananiadou</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2017-04</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Mirella</namePart>
            <namePart type="family">Lapata</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Phil</namePart>
            <namePart type="family">Blunsom</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Alexander</namePart>
            <namePart type="family">Koller</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Valencia, Spain</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>Descriptive document clustering aims to automatically discover groups of semantically related documents and to assign a meaningful label to characterise the content of each cluster. In this paper, we present a descriptive clustering approach that employs a distributed representation model, namely the paragraph vector model, to capture semantic similarities between documents and phrases. The proposed method uses a joint representation of phrases and documents (i.e., a co-embedding) to automatically select a descriptive phrase that best represents each document cluster. We evaluate our method by comparing its performance to an existing state-of-the-art descriptive clustering method that also uses co-embedding but relies on a bag-of-words representation. Results obtained on benchmark datasets demonstrate that the paragraph vector-based method obtains superior performance over the existing approach in both identifying clusters and assigning appropriate descriptive labels to them.</abstract>
    <identifier type="citekey">sato-etal-2017-distributed</identifier>
    <location>
        <url>https://aclanthology.org/E17-1093/</url>
    </location>
    <part>
        <date>2017-04</date>
        <extent unit="page">
            <start>991</start>
            <end>1001</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Distributed Document and Phrase Co-embeddings for Descriptive Clustering
%A Sato, Motoki
%A Brockmeier, Austin J.
%A Kontonatsios, Georgios
%A Mu, Tingting
%A Goulermas, John Y.
%A Tsujii, Jun’ichi
%A Ananiadou, Sophia
%Y Lapata, Mirella
%Y Blunsom, Phil
%Y Koller, Alexander
%S Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F sato-etal-2017-distributed
%X Descriptive document clustering aims to automatically discover groups of semantically related documents and to assign a meaningful label to characterise the content of each cluster. In this paper, we present a descriptive clustering approach that employs a distributed representation model, namely the paragraph vector model, to capture semantic similarities between documents and phrases. The proposed method uses a joint representation of phrases and documents (i.e., a co-embedding) to automatically select a descriptive phrase that best represents each document cluster. We evaluate our method by comparing its performance to an existing state-of-the-art descriptive clustering method that also uses co-embedding but relies on a bag-of-words representation. Results obtained on benchmark datasets demonstrate that the paragraph vector-based method obtains superior performance over the existing approach in both identifying clusters and assigning appropriate descriptive labels to them.
%U https://aclanthology.org/E17-1093/
%P 991-1001
Markdown (Informal)
[Distributed Document and Phrase Co-embeddings for Descriptive Clustering](https://aclanthology.org/E17-1093/) (Sato et al., EACL 2017)
ACL
- Motoki Sato, Austin J. Brockmeier, Georgios Kontonatsios, Tingting Mu, John Y. Goulermas, Jun’ichi Tsujii, and Sophia Ananiadou. 2017. Distributed Document and Phrase Co-embeddings for Descriptive Clustering. In Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, pages 991–1001, Valencia, Spain. Association for Computational Linguistics.