@inproceedings{pozar-etal-2022-cuni,
title = "{CUNI} Submission to the {BUCC} 2022 Shared Task on Bilingual Term Alignment",
author = "Po{\v{z}}{\'a}r, Borek and
Tauchmanov{\'a}, Kl{\'a}ra and
Neumannov{\'a}, Krist{\'y}na and
Kvapil{\'i}kov{\'a}, Ivana and
Bojar, Ond{\v{r}}ej",
editor = "Rapp, Reinhard and
Zweigenbaum, Pierre and
Sharoff, Serge",
booktitle = "Proceedings of the BUCC Workshop within LREC 2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.bucc-1.6/",
pages = "43--49",
abstract = "We present our submission to the BUCC Shared Task on bilingual term alignment in comparable specialized corpora. We devised three approaches using static embeddings with post-hoc alignment, the Monoses pipeline for unsupervised phrase-based machine translation, and contextualized multilingual embeddings. We show that contextualized embeddings from pretrained multilingual models lead to similar results as static embeddings but further improvement can be achieved by task-specific fine-tuning. Retrieving term pairs from the running phrase tables of the Monoses systems can match this enhanced performance and leads to an average precision of 0.88 on the train set."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pozar-etal-2022-cuni">
<titleInfo>
<title>CUNI Submission to the BUCC 2022 Shared Task on Bilingual Term Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Borek</namePart>
<namePart type="family">Požár</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Klára</namePart>
<namePart type="family">Tauchmanová</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristýna</namePart>
<namePart type="family">Neumannová</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivana</namePart>
<namePart type="family">Kvapilíková</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the BUCC Workshop within LREC 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our submission to the BUCC Shared Task on bilingual term alignment in comparable specialized corpora. We devised three approaches using static embeddings with post-hoc alignment, the Monoses pipeline for unsupervised phrase-based machine translation, and contextualized multilingual embeddings. We show that contextualized embeddings from pretrained multilingual models lead to similar results as static embeddings but further improvement can be achieved by task-specific fine-tuning. Retrieving term pairs from the running phrase tables of the Monoses systems can match this enhanced performance and leads to an average precision of 0.88 on the train set.</abstract>
<identifier type="citekey">pozar-etal-2022-cuni</identifier>
<location>
<url>https://aclanthology.org/2022.bucc-1.6/</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>43</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CUNI Submission to the BUCC 2022 Shared Task on Bilingual Term Alignment
%A Požár, Borek
%A Tauchmanová, Klára
%A Neumannová, Kristýna
%A Kvapilíková, Ivana
%A Bojar, Ondřej
%Y Rapp, Reinhard
%Y Zweigenbaum, Pierre
%Y Sharoff, Serge
%S Proceedings of the BUCC Workshop within LREC 2022
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F pozar-etal-2022-cuni
%X We present our submission to the BUCC Shared Task on bilingual term alignment in comparable specialized corpora. We devised three approaches using static embeddings with post-hoc alignment, the Monoses pipeline for unsupervised phrase-based machine translation, and contextualized multilingual embeddings. We show that contextualized embeddings from pretrained multilingual models lead to similar results as static embeddings but further improvement can be achieved by task-specific fine-tuning. Retrieving term pairs from the running phrase tables of the Monoses systems can match this enhanced performance and leads to an average precision of 0.88 on the train set.
%U https://aclanthology.org/2022.bucc-1.6/
%P 43-49
Markdown (Informal)
[CUNI Submission to the BUCC 2022 Shared Task on Bilingual Term Alignment](https://aclanthology.org/2022.bucc-1.6/) (Požár et al., BUCC 2022)
ACL