@inproceedings{zhou-etal-2024-matching,
title = "Matching Varying-Length Texts via Topic-Informed and Decoupled Sentence Embeddings",
author = "Zhou, Xixi and
Gu, Chunbin and
Jie, Xin and
Bu, Jiajun and
Wang, Haishuai",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.81",
doi = "10.18653/v1/2024.findings-naacl.81",
pages = "1274--1280",
abstract = "Measuring semantic similarity between texts is a crucial task in natural language processing. While existing semantic text matching focuses on pairs of similar-length sequences, matching texts with non-comparable lengths has broader applications in specific domains, such as comparing professional document summaries and content. Current approaches struggle with text pairs of non-comparable lengths due to truncation issues. To address this, we split texts into natural sentences and decouple sentence representations using supervised contrastive learning (SCL). Meanwhile, we adopt the embedded topic model (ETM) for specific domain data. Our experiments demonstrate the effectiveness of our model, based on decoupled and topic-informed sentence embeddings, in matching texts of significantly different lengths across three well-studied datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2024-matching">
<titleInfo>
<title>Matching Varying-Length Texts via Topic-Informed and Decoupled Sentence Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xixi</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chunbin</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Jie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Bu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haishuai</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Measuring semantic similarity between texts is a crucial task in natural language processing. While existing semantic text matching focuses on pairs of similar-length sequences, matching texts with non-comparable lengths has broader applications in specific domains, such as comparing professional document summaries and content. Current approaches struggle with text pairs of non-comparable lengths due to truncation issues. To address this, we split texts into natural sentences and decouple sentence representations using supervised contrastive learning (SCL). Meanwhile, we adopt the embedded topic model (ETM) for specific domain data. Our experiments demonstrate the effectiveness of our model, based on decoupled and topic-informed sentence embeddings, in matching texts of significantly different lengths across three well-studied datasets.</abstract>
<identifier type="citekey">zhou-etal-2024-matching</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.81</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.81</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1274</start>
<end>1280</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Matching Varying-Length Texts via Topic-Informed and Decoupled Sentence Embeddings
%A Zhou, Xixi
%A Gu, Chunbin
%A Jie, Xin
%A Bu, Jiajun
%A Wang, Haishuai
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F zhou-etal-2024-matching
%X Measuring semantic similarity between texts is a crucial task in natural language processing. While existing semantic text matching focuses on pairs of similar-length sequences, matching texts with non-comparable lengths has broader applications in specific domains, such as comparing professional document summaries and content. Current approaches struggle with text pairs of non-comparable lengths due to truncation issues. To address this, we split texts into natural sentences and decouple sentence representations using supervised contrastive learning (SCL). Meanwhile, we adopt the embedded topic model (ETM) for specific domain data. Our experiments demonstrate the effectiveness of our model, based on decoupled and topic-informed sentence embeddings, in matching texts of significantly different lengths across three well-studied datasets.
%R 10.18653/v1/2024.findings-naacl.81
%U https://aclanthology.org/2024.findings-naacl.81
%U https://doi.org/10.18653/v1/2024.findings-naacl.81
%P 1274-1280
Markdown (Informal)
[Matching Varying-Length Texts via Topic-Informed and Decoupled Sentence Embeddings](https://aclanthology.org/2024.findings-naacl.81) (Zhou et al., Findings 2024)
ACL