@inproceedings{bennani-smires-etal-2018-simple,
title = "Simple Unsupervised Keyphrase Extraction using Sentence Embeddings",
author = "Bennani-Smires, Kamil and
Musat, Claudiu and
Hossmann, Andreea and
Baeriswyl, Michael and
Jaggi, Martin",
editor = "Korhonen, Anna and
Titov, Ivan",
booktitle = "Proceedings of the 22nd Conference on Computational Natural Language Learning",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K18-1022/",
doi = "10.18653/v1/K18-1022",
pages = "221--229",
abstract = "Keyphrase extraction is the task of automatically selecting a small set of phrases that best describe a given free text document. Supervised keyphrase extraction requires large amounts of labeled training data and generalizes very poorly outside the domain of the training data. At the same time, unsupervised systems have poor accuracy, and often do not generalize well, as they require the input document to belong to a larger corpus also given as input. Addressing these drawbacks, in this paper, we tackle keyphrase extraction from single documents with EmbedRank: a novel unsupervised method, that leverages sentence embeddings. EmbedRank achieves higher F-scores than graph-based state of the art systems on standard datasets and is suitable for real-time processing of large amounts of Web data. With EmbedRank, we also explicitly increase coverage and diversity among the selected keyphrases by introducing an embedding-based maximal marginal relevance (MMR) for new phrases. A user study including over 200 votes showed that, although reducing the phrases' semantic overlap leads to no gains in F-score, our high diversity selection is preferred by humans."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bennani-smires-etal-2018-simple">
<titleInfo>
<title>Simple Unsupervised Keyphrase Extraction using Sentence Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kamil</namePart>
<namePart type="family">Bennani-Smires</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudiu</namePart>
<namePart type="family">Musat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreea</namePart>
<namePart type="family">Hossmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Baeriswyl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Jaggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Titov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Keyphrase extraction is the task of automatically selecting a small set of phrases that best describe a given free text document. Supervised keyphrase extraction requires large amounts of labeled training data and generalizes very poorly outside the domain of the training data. At the same time, unsupervised systems have poor accuracy, and often do not generalize well, as they require the input document to belong to a larger corpus also given as input. Addressing these drawbacks, in this paper, we tackle keyphrase extraction from single documents with EmbedRank: a novel unsupervised method, that leverages sentence embeddings. EmbedRank achieves higher F-scores than graph-based state of the art systems on standard datasets and is suitable for real-time processing of large amounts of Web data. With EmbedRank, we also explicitly increase coverage and diversity among the selected keyphrases by introducing an embedding-based maximal marginal relevance (MMR) for new phrases. A user study including over 200 votes showed that, although reducing the phrases’ semantic overlap leads to no gains in F-score, our high diversity selection is preferred by humans.</abstract>
<identifier type="citekey">bennani-smires-etal-2018-simple</identifier>
<identifier type="doi">10.18653/v1/K18-1022</identifier>
<location>
<url>https://aclanthology.org/K18-1022/</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>221</start>
<end>229</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Simple Unsupervised Keyphrase Extraction using Sentence Embeddings
%A Bennani-Smires, Kamil
%A Musat, Claudiu
%A Hossmann, Andreea
%A Baeriswyl, Michael
%A Jaggi, Martin
%Y Korhonen, Anna
%Y Titov, Ivan
%S Proceedings of the 22nd Conference on Computational Natural Language Learning
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F bennani-smires-etal-2018-simple
%X Keyphrase extraction is the task of automatically selecting a small set of phrases that best describe a given free text document. Supervised keyphrase extraction requires large amounts of labeled training data and generalizes very poorly outside the domain of the training data. At the same time, unsupervised systems have poor accuracy, and often do not generalize well, as they require the input document to belong to a larger corpus also given as input. Addressing these drawbacks, in this paper, we tackle keyphrase extraction from single documents with EmbedRank: a novel unsupervised method, that leverages sentence embeddings. EmbedRank achieves higher F-scores than graph-based state of the art systems on standard datasets and is suitable for real-time processing of large amounts of Web data. With EmbedRank, we also explicitly increase coverage and diversity among the selected keyphrases by introducing an embedding-based maximal marginal relevance (MMR) for new phrases. A user study including over 200 votes showed that, although reducing the phrases’ semantic overlap leads to no gains in F-score, our high diversity selection is preferred by humans.
%R 10.18653/v1/K18-1022
%U https://aclanthology.org/K18-1022/
%U https://doi.org/10.18653/v1/K18-1022
%P 221-229
Markdown (Informal)
[Simple Unsupervised Keyphrase Extraction using Sentence Embeddings](https://aclanthology.org/K18-1022/) (Bennani-Smires et al., CoNLL 2018)
ACL