@inproceedings{miranda-etal-2018-multilingual,
title = "Multilingual Clustering of Streaming News",
author = "Miranda, Sebasti{\~a}o and
Znoti{\c{n}}{\v{s}}, Art{\=u}rs and
Cohen, Shay B. and
Barzdins, Guntis",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D18-1483",
doi = "10.18653/v1/D18-1483",
pages = "4535--4544",
abstract = "Clustering news across languages enables efficient media monitoring by aggregating articles from multilingual sources into coherent stories. Doing so in an online setting allows scalable processing of massive news streams. To this end, we describe a novel method for clustering an incoming stream of multilingual documents into monolingual and crosslingual clusters. Unlike typical clustering approaches that report results on datasets with a small and known number of labels, we tackle the problem of discovering an ever growing number of cluster labels in an online fashion, using real news datasets in multiple languages. In our formulation, the monolingual clusters group together documents while the crosslingual clusters group together monolingual clusters, one per language that appears in the stream. Our method is simple to implement, computationally efficient and produces state-of-the-art results on datasets in German, English and Spanish.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="miranda-etal-2018-multilingual">
<titleInfo>
<title>Multilingual Clustering of Streaming News</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastião</namePart>
<namePart type="family">Miranda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artūrs</namePart>
<namePart type="family">Znotiņš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shay</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guntis</namePart>
<namePart type="family">Barzdins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Riloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Chiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hockenmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun’ichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Clustering news across languages enables efficient media monitoring by aggregating articles from multilingual sources into coherent stories. Doing so in an online setting allows scalable processing of massive news streams. To this end, we describe a novel method for clustering an incoming stream of multilingual documents into monolingual and crosslingual clusters. Unlike typical clustering approaches that report results on datasets with a small and known number of labels, we tackle the problem of discovering an ever growing number of cluster labels in an online fashion, using real news datasets in multiple languages. In our formulation, the monolingual clusters group together documents while the crosslingual clusters group together monolingual clusters, one per language that appears in the stream. Our method is simple to implement, computationally efficient and produces state-of-the-art results on datasets in German, English and Spanish.</abstract>
<identifier type="citekey">miranda-etal-2018-multilingual</identifier>
<identifier type="doi">10.18653/v1/D18-1483</identifier>
<location>
<url>https://aclanthology.org/D18-1483</url>
</location>
<part>
<date>2018-oct-nov</date>
<extent unit="page">
<start>4535</start>
<end>4544</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Clustering of Streaming News
%A Miranda, Sebastião
%A Znotiņš, Artūrs
%A Cohen, Shay B.
%A Barzdins, Guntis
%Y Riloff, Ellen
%Y Chiang, David
%Y Hockenmaier, Julia
%Y Tsujii, Jun’ichi
%S Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing
%D 2018
%8 oct nov
%I Association for Computational Linguistics
%C Brussels, Belgium
%F miranda-etal-2018-multilingual
%X Clustering news across languages enables efficient media monitoring by aggregating articles from multilingual sources into coherent stories. Doing so in an online setting allows scalable processing of massive news streams. To this end, we describe a novel method for clustering an incoming stream of multilingual documents into monolingual and crosslingual clusters. Unlike typical clustering approaches that report results on datasets with a small and known number of labels, we tackle the problem of discovering an ever growing number of cluster labels in an online fashion, using real news datasets in multiple languages. In our formulation, the monolingual clusters group together documents while the crosslingual clusters group together monolingual clusters, one per language that appears in the stream. Our method is simple to implement, computationally efficient and produces state-of-the-art results on datasets in German, English and Spanish.
%R 10.18653/v1/D18-1483
%U https://aclanthology.org/D18-1483
%U https://doi.org/10.18653/v1/D18-1483
%P 4535-4544
Markdown (Informal)
[Multilingual Clustering of Streaming News](https://aclanthology.org/D18-1483) (Miranda et al., EMNLP 2018)
ACL
- Sebastião Miranda, Artūrs Znotiņš, Shay B. Cohen, and Guntis Barzdins. 2018. Multilingual Clustering of Streaming News. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 4535–4544, Brussels, Belgium. Association for Computational Linguistics.