@article{TACL571,
        author = {John Wieting and Mohit Bansal and Kevin Gimpel and Karen
Livescu},
        title = {From Paraphrase Database to Compositional Paraphrase Model and
Back},
        journal = {Transactions of the Association for Computational Linguistics},
        volume = {3},
        year = {2015},
        keywords = {},
        abstract = {The Paraphrase Database (PPDB; Ganitkevitch et al., 2013) is an
extensive semantic resource, consisting of a list of phrase pairs with
(heuristic) confidence estimates. However, it is still unclear how it can
best be used, due to the heuristic nature of the confidences and its
necessarily incomplete coverage. We propose models to leverage the phrase
pairs from the PPDB to build parametric paraphrase models that score
paraphrase pairs more accurately than the PPDB’s internal scores while
simultaneously improving its coverage. They allow for learning phrase
embeddings as well as improved word embeddings. Moreover, we introduce two
new, manually annotated datasets to evaluate short-phrase paraphrasing
models. Using our paraphrase model trained using PPDB, we achieve
state-of-the-art results on standard word and bigram similarity tasks and
beat strong baselines on our new short phrase paraphrase tasks.},
        issn = {2307-387X},
        url =
{https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/571},
        pages = {345--358}
}
