@inproceedings{estevanell-valladares-etal-2026-smartmatch,
title = "{S}mart{M}atch: Real-Time Semantic Retrieval for Translation Memory Systems",
author = "Estevanell Valladares, Ernesto Luis and
Lamsiyah, Salima and
Picazo-Izquierdo, Alicia and
Ranasinghe, Tharindu and
Mitkov, Ruslan and
Munoz, Rafael",
editor = "Croce, Danilo and
Leidner, Jochen and
Moosavi, Nafise Sadat",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-demo.36/",
pages = "527--536",
ISBN = "979-8-89176-382-1",
abstract = "Translation Memory (TM) systems are core components of commercial computer-aided translation (CAT) tools. However, traditional fuzzy matching methods often fail to retrieve semantically relevant content when surface similarity is low. We introduce SmartMatch, an open-source interactive demo and evaluation toolkit for TM retrieval that connects modern sentence encoders (including LLM-derived representations) and strong lexical/fuzzy baselines with a vector database, and exposes the end-to-end retrieval pipeline through a web-based UI for qualitative inspection and preference logging. The demo allows users to (i) enter a query segment, (ii) switch retrieval backends and embedding models, (iii) inspect top-$k$ retrieved matches with similarity scores and qualitative cues, and (iv) observe end-to-end latency in real time. We provide a reproducible benchmark on multilingual TM data, reporting retrieval quality using reference-based MT metrics (COMET, BERTScore, METEOR, chrF) together with coverage and latency/throughput trade-offs relevant to real-time CAT workflows. On DGT-TM, encoder-based retrieval achieves full coverage (100{\%}) with millisecond-level latency (p50/p90 $\leq$ 6{--}20 ms) and attains the strongest semantic-quality scores on the shared query set (e.g., BERTScore up to 0.91 at $k{=}10$), while BM25 remains a strong lightweight lexical baseline with very low latency. SmartMatch targets CAT researchers and tool builders and bridges recent advances in sentence encoders with the real-time constraints of translation memory retrieval."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="estevanell-valladares-etal-2026-smartmatch">
<titleInfo>
<title>SmartMatch: Real-Time Semantic Retrieval for Translation Memory Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ernesto</namePart>
<namePart type="given">Luis</namePart>
<namePart type="family">Estevanell Valladares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salima</namePart>
<namePart type="family">Lamsiyah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alicia</namePart>
<namePart type="family">Picazo-Izquierdo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="family">Munoz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jochen</namePart>
<namePart type="family">Leidner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-382-1</identifier>
</relatedItem>
<abstract>Translation Memory (TM) systems are core components of commercial computer-aided translation (CAT) tools. However, traditional fuzzy matching methods often fail to retrieve semantically relevant content when surface similarity is low. We introduce SmartMatch, an open-source interactive demo and evaluation toolkit for TM retrieval that connects modern sentence encoders (including LLM-derived representations) and strong lexical/fuzzy baselines with a vector database, and exposes the end-to-end retrieval pipeline through a web-based UI for qualitative inspection and preference logging. The demo allows users to (i) enter a query segment, (ii) switch retrieval backends and embedding models, (iii) inspect top-k retrieved matches with similarity scores and qualitative cues, and (iv) observe end-to-end latency in real time. We provide a reproducible benchmark on multilingual TM data, reporting retrieval quality using reference-based MT metrics (COMET, BERTScore, METEOR, chrF) together with coverage and latency/throughput trade-offs relevant to real-time CAT workflows. On DGT-TM, encoder-based retrieval achieves full coverage (100%) with millisecond-level latency (p50/p90 łeq 6–20 ms) and attains the strongest semantic-quality scores on the shared query set (e.g., BERTScore up to 0.91 at k=10), while BM25 remains a strong lightweight lexical baseline with very low latency. SmartMatch targets CAT researchers and tool builders and bridges recent advances in sentence encoders with the real-time constraints of translation memory retrieval.</abstract>
<identifier type="citekey">estevanell-valladares-etal-2026-smartmatch</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-demo.36/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>527</start>
<end>536</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SmartMatch: Real-Time Semantic Retrieval for Translation Memory Systems
%A Estevanell Valladares, Ernesto Luis
%A Lamsiyah, Salima
%A Picazo-Izquierdo, Alicia
%A Ranasinghe, Tharindu
%A Mitkov, Ruslan
%A Munoz, Rafael
%Y Croce, Danilo
%Y Leidner, Jochen
%Y Moosavi, Nafise Sadat
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-382-1
%F estevanell-valladares-etal-2026-smartmatch
%X Translation Memory (TM) systems are core components of commercial computer-aided translation (CAT) tools. However, traditional fuzzy matching methods often fail to retrieve semantically relevant content when surface similarity is low. We introduce SmartMatch, an open-source interactive demo and evaluation toolkit for TM retrieval that connects modern sentence encoders (including LLM-derived representations) and strong lexical/fuzzy baselines with a vector database, and exposes the end-to-end retrieval pipeline through a web-based UI for qualitative inspection and preference logging. The demo allows users to (i) enter a query segment, (ii) switch retrieval backends and embedding models, (iii) inspect top-k retrieved matches with similarity scores and qualitative cues, and (iv) observe end-to-end latency in real time. We provide a reproducible benchmark on multilingual TM data, reporting retrieval quality using reference-based MT metrics (COMET, BERTScore, METEOR, chrF) together with coverage and latency/throughput trade-offs relevant to real-time CAT workflows. On DGT-TM, encoder-based retrieval achieves full coverage (100%) with millisecond-level latency (p50/p90 łeq 6–20 ms) and attains the strongest semantic-quality scores on the shared query set (e.g., BERTScore up to 0.91 at k=10), while BM25 remains a strong lightweight lexical baseline with very low latency. SmartMatch targets CAT researchers and tool builders and bridges recent advances in sentence encoders with the real-time constraints of translation memory retrieval.
%U https://aclanthology.org/2026.eacl-demo.36/
%P 527-536
Markdown (Informal)
[SmartMatch: Real-Time Semantic Retrieval for Translation Memory Systems](https://aclanthology.org/2026.eacl-demo.36/) (Estevanell Valladares et al., EACL 2026)
ACL
- Ernesto Luis Estevanell Valladares, Salima Lamsiyah, Alicia Picazo-Izquierdo, Tharindu Ranasinghe, Ruslan Mitkov, and Rafael Munoz. 2026. SmartMatch: Real-Time Semantic Retrieval for Translation Memory Systems. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 527–536, Rabat, Marocco. Association for Computational Linguistics.