@inproceedings{shrestha-2011-corpus,
title = "Corpus-Based methods for Short Text Similarity",
author = "Shrestha, Prajol",
editor = "Lopez, C{\'e}dric",
booktitle = "Actes de la 18e conf{\'e}rence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (articles courts)",
month = jun,
year = "2011",
address = "Montpellier, France",
publisher = "ATALA",
url = "https://aclanthology.org/2011.jeptalnrecital-recitalcourt.1",
pages = "1--6",
abstract = "This paper presents corpus-based methods to find similarity between short text (sentences, paragraphs, ...) which has many applications in the field of NLP. Previous works on this problem have been based on supervised methods or have used external resources such as WordNet, British National Corpus etc. Our methods are focused on unsupervised corpus-based methods. We present a new method, based on Vector Space Model, to capture the contextual behavior, senses and correlation, of terms and show that this method performs better than the baseline method that uses vector based cosine similarity measure. The performance of existing document similarity measures, Dice and Resemblance, are also evaluated which in our knowledge have not been used for short text similarity. We also show that the performance of the vector-based baseline method is improved when using stems instead of words and using the candidate sentences for computing the parameters rather than some external resource.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shrestha-2011-corpus">
<titleInfo>
<title>Corpus-Based methods for Short Text Similarity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prajol</namePart>
<namePart type="family">Shrestha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de la 18e conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (articles courts)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cédric</namePart>
<namePart type="family">Lopez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA</publisher>
<place>
<placeTerm type="text">Montpellier, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents corpus-based methods to find similarity between short text (sentences, paragraphs, ...) which has many applications in the field of NLP. Previous works on this problem have been based on supervised methods or have used external resources such as WordNet, British National Corpus etc. Our methods are focused on unsupervised corpus-based methods. We present a new method, based on Vector Space Model, to capture the contextual behavior, senses and correlation, of terms and show that this method performs better than the baseline method that uses vector based cosine similarity measure. The performance of existing document similarity measures, Dice and Resemblance, are also evaluated which in our knowledge have not been used for short text similarity. We also show that the performance of the vector-based baseline method is improved when using stems instead of words and using the candidate sentences for computing the parameters rather than some external resource.</abstract>
<identifier type="citekey">shrestha-2011-corpus</identifier>
<location>
<url>https://aclanthology.org/2011.jeptalnrecital-recitalcourt.1</url>
</location>
<part>
<date>2011-06</date>
<extent unit="page">
<start>1</start>
<end>6</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus-Based methods for Short Text Similarity
%A Shrestha, Prajol
%Y Lopez, Cédric
%S Actes de la 18e conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (articles courts)
%D 2011
%8 June
%I ATALA
%C Montpellier, France
%F shrestha-2011-corpus
%X This paper presents corpus-based methods to find similarity between short text (sentences, paragraphs, ...) which has many applications in the field of NLP. Previous works on this problem have been based on supervised methods or have used external resources such as WordNet, British National Corpus etc. Our methods are focused on unsupervised corpus-based methods. We present a new method, based on Vector Space Model, to capture the contextual behavior, senses and correlation, of terms and show that this method performs better than the baseline method that uses vector based cosine similarity measure. The performance of existing document similarity measures, Dice and Resemblance, are also evaluated which in our knowledge have not been used for short text similarity. We also show that the performance of the vector-based baseline method is improved when using stems instead of words and using the candidate sentences for computing the parameters rather than some external resource.
%U https://aclanthology.org/2011.jeptalnrecital-recitalcourt.1
%P 1-6
Markdown (Informal)
[Corpus-Based methods for Short Text Similarity](https://aclanthology.org/2011.jeptalnrecital-recitalcourt.1) (Shrestha, JEP/TALN/RECITAL 2011)
ACL
- Prajol Shrestha. 2011. Corpus-Based methods for Short Text Similarity. In Actes de la 18e conférence sur le Traitement Automatique des Langues Naturelles. REncontres jeunes Chercheurs en Informatique pour le Traitement Automatique des Langues (articles courts), pages 1–6, Montpellier, France. ATALA.