@article{xu-etal-2014-extracting,
    title = "Extracting Lexically Divergent Paraphrases from {T}witter",
    author = "Xu, Wei  and
      Ritter, Alan  and
      Callison-Burch, Chris  and
      Dolan, William B.  and
      Ji, Yangfeng",
    editor = "Lin, Dekang  and
      Collins, Michael  and
      Lee, Lillian",
    journal = "Transactions of the Association for Computational Linguistics",
    volume = "2",
    year = "2014",
    address = "Cambridge, MA",
    publisher = "MIT Press",
    url = "https://aclanthology.org/Q14-1034/",
    doi = "10.1162/tacl_a_00194",
    pages = "435--448",
    abstract = "We present MultiP (Multi-instance Learning Paraphrase Model), a new model suited to identify paraphrases within the short messages on Twitter. We jointly model paraphrase relations between word and sentence pairs and assume only sentence-level annotations during learning. Using this principled latent variable model alone, we achieve the performance competitive with a state-of-the-art method which combines a latent space model with a feature-based supervised classifier. Our model also captures lexically divergent paraphrases that differ from yet complement previous methods; combining our model with previous work significantly outperforms the state-of-the-art. In addition, we present a novel annotation methodology that has allowed us to crowdsource a paraphrase corpus from Twitter. We make this new dataset available to the research community."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2014-extracting">
    <titleInfo>
        <title>Extracting Lexically Divergent Paraphrases from Twitter</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Wei</namePart>
        <namePart type="family">Xu</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Alan</namePart>
        <namePart type="family">Ritter</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Chris</namePart>
        <namePart type="family">Callison-Burch</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">William</namePart>
        <namePart type="given">B</namePart>
        <namePart type="family">Dolan</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Yangfeng</namePart>
        <namePart type="family">Ji</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2014</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <genre authority="bibutilsgt">journal article</genre>
    <relatedItem type="host">
        <titleInfo>
            <title>Transactions of the Association for Computational Linguistics</title>
        </titleInfo>
        <originInfo>
            <issuance>continuing</issuance>
            <publisher>MIT Press</publisher>
            <place>
                <placeTerm type="text">Cambridge, MA</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">periodical</genre>
        <genre authority="bibutilsgt">academic journal</genre>
    </relatedItem>
    <abstract>We present MultiP (Multi-instance Learning Paraphrase Model), a new model suited to identify paraphrases within the short messages on Twitter. We jointly model paraphrase relations between word and sentence pairs and assume only sentence-level annotations during learning. Using this principled latent variable model alone, we achieve the performance competitive with a state-of-the-art method which combines a latent space model with a feature-based supervised classifier. Our model also captures lexically divergent paraphrases that differ from yet complement previous methods; combining our model with previous work significantly outperforms the state-of-the-art. In addition, we present a novel annotation methodology that has allowed us to crowdsource a paraphrase corpus from Twitter. We make this new dataset available to the research community.</abstract>
    <identifier type="citekey">xu-etal-2014-extracting</identifier>
    <identifier type="doi">10.1162/tacl_a_00194</identifier>
    <location>
        <url>https://aclanthology.org/Q14-1034/</url>
    </location>
    <part>
        <date>2014</date>
        <detail type="volume"><number>2</number></detail>
        <extent unit="page">
            <start>435</start>
            <end>448</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Journal Article
%T Extracting Lexically Divergent Paraphrases from Twitter
%A Xu, Wei
%A Ritter, Alan
%A Callison-Burch, Chris
%A Dolan, William B.
%A Ji, Yangfeng
%J Transactions of the Association for Computational Linguistics
%D 2014
%V 2
%I MIT Press
%C Cambridge, MA
%F xu-etal-2014-extracting
%X We present MultiP (Multi-instance Learning Paraphrase Model), a new model suited to identify paraphrases within the short messages on Twitter. We jointly model paraphrase relations between word and sentence pairs and assume only sentence-level annotations during learning. Using this principled latent variable model alone, we achieve the performance competitive with a state-of-the-art method which combines a latent space model with a feature-based supervised classifier. Our model also captures lexically divergent paraphrases that differ from yet complement previous methods; combining our model with previous work significantly outperforms the state-of-the-art. In addition, we present a novel annotation methodology that has allowed us to crowdsource a paraphrase corpus from Twitter. We make this new dataset available to the research community.
%R 10.1162/tacl_a_00194
%U https://aclanthology.org/Q14-1034/
%U https://doi.org/10.1162/tacl_a_00194
%P 435-448
Markdown (Informal)
[Extracting Lexically Divergent Paraphrases from Twitter](https://aclanthology.org/Q14-1034/) (Xu et al., TACL 2014)
ACL