@inproceedings{yu-etal-2022-optimal,
title = "Optimal Partial Transport Based Sentence Selection for Long-form Document Matching",
author = "Yu, Weijie and
Pang, Liang and
Xu, Jun and
Su, Bing and
Dong, Zhenhua and
Wen, Ji-Rong",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.208",
pages = "2363--2373",
abstract = "One typical approach to long-form document matching is first conducting alignment between cross-document sentence pairs, and then aggregating all of the sentence-level matching signals. However, this approach could be problematic because the alignment between documents is partial {---} despite two documents as a whole are well-matched, most of the sentences could still be dissimilar. Those dissimilar sentences lead to spurious sentence-level matching signals which may overwhelm the real ones, increasing the difficulties of learning the matching function. Therefore, accurately selecting the key sentences for document matching is becoming a challenging issue. To address the issue, we propose a novel matching approach that equips existing document matching models with an Optimal Partial Transport (OPT) based component, namely OPT-Match, which selects the sentences that play a major role in matching. Enjoying the partial transport properties of OPT, the selected key sentences can not only effectively enhance the matching accuracy, but also be explained as the rationales for the matching results. Extensive experiments on four publicly available datasets demonstrated that existing methods equipped with OPT-Match consistently outperformed the corresponding underlying methods. Evaluations also showed that the key sentences selected by OPT-Match were consistent with human-provided rationales.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yu-etal-2022-optimal">
<titleInfo>
<title>Optimal Partial Transport Based Sentence Selection for Long-form Document Matching</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weijie</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Pang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenhua</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ji-Rong</namePart>
<namePart type="family">Wen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansaem</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Key-Sun</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pum-Mo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younggyun</namePart>
<namePart type="family">Hahm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="given">Kyungil</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-Hoon</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One typical approach to long-form document matching is first conducting alignment between cross-document sentence pairs, and then aggregating all of the sentence-level matching signals. However, this approach could be problematic because the alignment between documents is partial — despite two documents as a whole are well-matched, most of the sentences could still be dissimilar. Those dissimilar sentences lead to spurious sentence-level matching signals which may overwhelm the real ones, increasing the difficulties of learning the matching function. Therefore, accurately selecting the key sentences for document matching is becoming a challenging issue. To address the issue, we propose a novel matching approach that equips existing document matching models with an Optimal Partial Transport (OPT) based component, namely OPT-Match, which selects the sentences that play a major role in matching. Enjoying the partial transport properties of OPT, the selected key sentences can not only effectively enhance the matching accuracy, but also be explained as the rationales for the matching results. Extensive experiments on four publicly available datasets demonstrated that existing methods equipped with OPT-Match consistently outperformed the corresponding underlying methods. Evaluations also showed that the key sentences selected by OPT-Match were consistent with human-provided rationales.</abstract>
<identifier type="citekey">yu-etal-2022-optimal</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.208</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>2363</start>
<end>2373</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Optimal Partial Transport Based Sentence Selection for Long-form Document Matching
%A Yu, Weijie
%A Pang, Liang
%A Xu, Jun
%A Su, Bing
%A Dong, Zhenhua
%A Wen, Ji-Rong
%Y Calzolari, Nicoletta
%Y Huang, Chu-Ren
%Y Kim, Hansaem
%Y Pustejovsky, James
%Y Wanner, Leo
%Y Choi, Key-Sun
%Y Ryu, Pum-Mo
%Y Chen, Hsin-Hsi
%Y Donatelli, Lucia
%Y Ji, Heng
%Y Kurohashi, Sadao
%Y Paggio, Patrizia
%Y Xue, Nianwen
%Y Kim, Seokhwan
%Y Hahm, Younggyun
%Y He, Zhong
%Y Lee, Tony Kyungil
%Y Santus, Enrico
%Y Bond, Francis
%Y Na, Seung-Hoon
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F yu-etal-2022-optimal
%X One typical approach to long-form document matching is first conducting alignment between cross-document sentence pairs, and then aggregating all of the sentence-level matching signals. However, this approach could be problematic because the alignment between documents is partial — despite two documents as a whole are well-matched, most of the sentences could still be dissimilar. Those dissimilar sentences lead to spurious sentence-level matching signals which may overwhelm the real ones, increasing the difficulties of learning the matching function. Therefore, accurately selecting the key sentences for document matching is becoming a challenging issue. To address the issue, we propose a novel matching approach that equips existing document matching models with an Optimal Partial Transport (OPT) based component, namely OPT-Match, which selects the sentences that play a major role in matching. Enjoying the partial transport properties of OPT, the selected key sentences can not only effectively enhance the matching accuracy, but also be explained as the rationales for the matching results. Extensive experiments on four publicly available datasets demonstrated that existing methods equipped with OPT-Match consistently outperformed the corresponding underlying methods. Evaluations also showed that the key sentences selected by OPT-Match were consistent with human-provided rationales.
%U https://aclanthology.org/2022.coling-1.208
%P 2363-2373
Markdown (Informal)
[Optimal Partial Transport Based Sentence Selection for Long-form Document Matching](https://aclanthology.org/2022.coling-1.208) (Yu et al., COLING 2022)
ACL