@inproceedings{stefanescu-etal-2014-latent,
title = "Latent Semantic Analysis Models on {W}ikipedia and {TASA}",
author = "{\textcommabelow{S}}tef{\u{a}}nescu, Dan and
Banjade, Rajendra and
Rus, Vasile",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/403_Paper.pdf",
pages = "1417--1422",
abstract = "This paper introduces a collection of freely available Latent Semantic Analysis models built on the entire English Wikipedia and the TASA corpus. The models differ not only on their source, Wikipedia versus TASA, but also on the linguistic items they focus on: all words, content-words, nouns-verbs, and main concepts. Generating such models from large datasets (e.g. Wikipedia), that can provide a large coverage for the actual vocabulary in use, is computationally challenging, which is the reason why large LSA models are rarely available. Our experiments show that for the task of word-to-word similarity, the scores assigned by these models are strongly correlated with human judgment, outperforming many other frequently used measures, and comparable to the state of the art.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stefanescu-etal-2014-latent">
<titleInfo>
<title>Latent Semantic Analysis Models on Wikipedia and TASA</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">\textcommabelowStefănescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajendra</namePart>
<namePart type="family">Banjade</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasile</namePart>
<namePart type="family">Rus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces a collection of freely available Latent Semantic Analysis models built on the entire English Wikipedia and the TASA corpus. The models differ not only on their source, Wikipedia versus TASA, but also on the linguistic items they focus on: all words, content-words, nouns-verbs, and main concepts. Generating such models from large datasets (e.g. Wikipedia), that can provide a large coverage for the actual vocabulary in use, is computationally challenging, which is the reason why large LSA models are rarely available. Our experiments show that for the task of word-to-word similarity, the scores assigned by these models are strongly correlated with human judgment, outperforming many other frequently used measures, and comparable to the state of the art.</abstract>
<identifier type="citekey">stefanescu-etal-2014-latent</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/403_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>1417</start>
<end>1422</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Latent Semantic Analysis Models on Wikipedia and TASA
%A \textcommabelowStefănescu, Dan
%A Banjade, Rajendra
%A Rus, Vasile
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F stefanescu-etal-2014-latent
%X This paper introduces a collection of freely available Latent Semantic Analysis models built on the entire English Wikipedia and the TASA corpus. The models differ not only on their source, Wikipedia versus TASA, but also on the linguistic items they focus on: all words, content-words, nouns-verbs, and main concepts. Generating such models from large datasets (e.g. Wikipedia), that can provide a large coverage for the actual vocabulary in use, is computationally challenging, which is the reason why large LSA models are rarely available. Our experiments show that for the task of word-to-word similarity, the scores assigned by these models are strongly correlated with human judgment, outperforming many other frequently used measures, and comparable to the state of the art.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/403_Paper.pdf
%P 1417-1422
Markdown (Informal)
[Latent Semantic Analysis Models on Wikipedia and TASA](http://www.lrec-conf.org/proceedings/lrec2014/pdf/403_Paper.pdf) (Ștefănescu et al., LREC 2014)
ACL
- Dan Ștefănescu, Rajendra Banjade, and Vasile Rus. 2014. Latent Semantic Analysis Models on Wikipedia and TASA. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 1417–1422, Reykjavik, Iceland. European Language Resources Association (ELRA).