@inproceedings{erker-etal-2024-triple,
title = "Triple-Encoders: Representations That Fire Together, Wire Together",
author = "Erker, Justus-Jonas and
Mai, Florian and
Reimers, Nils and
Spanakis, Gerasimos and
Gurevych, Iryna",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.290",
doi = "10.18653/v1/2024.acl-long.290",
pages = "5317--5332",
abstract = "Search-based dialog models typically re-encode the dialog history at every turn, incurring high cost.Curved Contrastive Learning, a representation learning method that encodes relative distances between utterances into the embedding space via a bi-encoder, has recently shown promising results for dialog modeling at far superior efficiency.While high efficiency is achieved through independently encoding utterances, this ignores the importance of contextualization. To overcome this issue, this study introduces triple-encoders, which efficiently compute distributed utterance mixtures from these independently encoded utterances through a novel hebbian inspired co-occurrence learning objective in a self-organizing manner, without using any weights, i.e., merely through local interactions. Empirically, we find that triple-encoders lead to a substantial improvement over bi-encoders, and even to better zero-shot generalization than single-vector representation models without requiring re-encoding. Our code (https://github.com/UKPLab/acl2024-triple-encoders) and model (https://huggingface.co/UKPLab/triple-encoders-dailydialog) are publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="erker-etal-2024-triple">
<titleInfo>
<title>Triple-Encoders: Representations That Fire Together, Wire Together</title>
</titleInfo>
<name type="personal">
<namePart type="given">Justus-Jonas</namePart>
<namePart type="family">Erker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Mai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reimers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerasimos</namePart>
<namePart type="family">Spanakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Search-based dialog models typically re-encode the dialog history at every turn, incurring high cost.Curved Contrastive Learning, a representation learning method that encodes relative distances between utterances into the embedding space via a bi-encoder, has recently shown promising results for dialog modeling at far superior efficiency.While high efficiency is achieved through independently encoding utterances, this ignores the importance of contextualization. To overcome this issue, this study introduces triple-encoders, which efficiently compute distributed utterance mixtures from these independently encoded utterances through a novel hebbian inspired co-occurrence learning objective in a self-organizing manner, without using any weights, i.e., merely through local interactions. Empirically, we find that triple-encoders lead to a substantial improvement over bi-encoders, and even to better zero-shot generalization than single-vector representation models without requiring re-encoding. Our code (https://github.com/UKPLab/acl2024-triple-encoders) and model (https://huggingface.co/UKPLab/triple-encoders-dailydialog) are publicly available.</abstract>
<identifier type="citekey">erker-etal-2024-triple</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.290</identifier>
<location>
<url>https://aclanthology.org/2024.acl-long.290</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>5317</start>
<end>5332</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Triple-Encoders: Representations That Fire Together, Wire Together
%A Erker, Justus-Jonas
%A Mai, Florian
%A Reimers, Nils
%A Spanakis, Gerasimos
%A Gurevych, Iryna
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F erker-etal-2024-triple
%X Search-based dialog models typically re-encode the dialog history at every turn, incurring high cost.Curved Contrastive Learning, a representation learning method that encodes relative distances between utterances into the embedding space via a bi-encoder, has recently shown promising results for dialog modeling at far superior efficiency.While high efficiency is achieved through independently encoding utterances, this ignores the importance of contextualization. To overcome this issue, this study introduces triple-encoders, which efficiently compute distributed utterance mixtures from these independently encoded utterances through a novel hebbian inspired co-occurrence learning objective in a self-organizing manner, without using any weights, i.e., merely through local interactions. Empirically, we find that triple-encoders lead to a substantial improvement over bi-encoders, and even to better zero-shot generalization than single-vector representation models without requiring re-encoding. Our code (https://github.com/UKPLab/acl2024-triple-encoders) and model (https://huggingface.co/UKPLab/triple-encoders-dailydialog) are publicly available.
%R 10.18653/v1/2024.acl-long.290
%U https://aclanthology.org/2024.acl-long.290
%U https://doi.org/10.18653/v1/2024.acl-long.290
%P 5317-5332
Markdown (Informal)
[Triple-Encoders: Representations That Fire Together, Wire Together](https://aclanthology.org/2024.acl-long.290) (Erker et al., ACL 2024)
ACL
- Justus-Jonas Erker, Florian Mai, Nils Reimers, Gerasimos Spanakis, and Iryna Gurevych. 2024. Triple-Encoders: Representations That Fire Together, Wire Together. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 5317–5332, Bangkok, Thailand. Association for Computational Linguistics.