@inproceedings{dinh-etal-2022-utilizing,
title = "Utilizing Language-Image Pretraining for Efficient and Robust Bilingual Word Alignment",
author = "Dinh, Tuan and
Sohn, Jy-yong and
Rajput, Shashank and
Ossowski, Timothy and
Ming, Yifei and
Hu, Junjie and
Papailiopoulos, Dimitris and
Lee, Kangwook",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.12",
doi = "10.18653/v1/2022.findings-emnlp.12",
pages = "154--168",
abstract = "Word translation without parallel corpora has become feasible, rivaling the performance of supervised methods. Recent findings have shown the improvement in accuracy and robustness of unsupervised word translation (UWT) by utilizing visual observations, which are universal representations across languages.Our work investigates the potential of using not only visual observations but also pretrained language-image models for enabling a more efficient and robust UWT. We develop a novel UWT method dubbed Word Alignment using Language-Image Pretraining (WALIP), leveraging visual observations via the shared image-text embedding space of CLIPs (Radford et al., 2021). WALIP has a two-step procedure. First, we retrieve word pairs with high confidences of similarity, computed using our proposed image-based fingerprints, which define the initial pivot for the alignment.Second, we apply our robust Procrustes algorithm to estimate the linear mapping between two embedding spaces, which iteratively corrects and refines the estimated alignment.Our extensive experiments show that WALIP improves upon the state-of-the-art performance of bilingual word alignment for a few language pairs across different word embeddings and displays great robustness to the dissimilarity of language pairs or training corpora for two word embeddings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dinh-etal-2022-utilizing">
<titleInfo>
<title>Utilizing Language-Image Pretraining for Efficient and Robust Bilingual Word Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tuan</namePart>
<namePart type="family">Dinh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jy-yong</namePart>
<namePart type="family">Sohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashank</namePart>
<namePart type="family">Rajput</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Ossowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifei</namePart>
<namePart type="family">Ming</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitris</namePart>
<namePart type="family">Papailiopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kangwook</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word translation without parallel corpora has become feasible, rivaling the performance of supervised methods. Recent findings have shown the improvement in accuracy and robustness of unsupervised word translation (UWT) by utilizing visual observations, which are universal representations across languages.Our work investigates the potential of using not only visual observations but also pretrained language-image models for enabling a more efficient and robust UWT. We develop a novel UWT method dubbed Word Alignment using Language-Image Pretraining (WALIP), leveraging visual observations via the shared image-text embedding space of CLIPs (Radford et al., 2021). WALIP has a two-step procedure. First, we retrieve word pairs with high confidences of similarity, computed using our proposed image-based fingerprints, which define the initial pivot for the alignment.Second, we apply our robust Procrustes algorithm to estimate the linear mapping between two embedding spaces, which iteratively corrects and refines the estimated alignment.Our extensive experiments show that WALIP improves upon the state-of-the-art performance of bilingual word alignment for a few language pairs across different word embeddings and displays great robustness to the dissimilarity of language pairs or training corpora for two word embeddings.</abstract>
<identifier type="citekey">dinh-etal-2022-utilizing</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.12</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.12</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>154</start>
<end>168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Utilizing Language-Image Pretraining for Efficient and Robust Bilingual Word Alignment
%A Dinh, Tuan
%A Sohn, Jy-yong
%A Rajput, Shashank
%A Ossowski, Timothy
%A Ming, Yifei
%A Hu, Junjie
%A Papailiopoulos, Dimitris
%A Lee, Kangwook
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F dinh-etal-2022-utilizing
%X Word translation without parallel corpora has become feasible, rivaling the performance of supervised methods. Recent findings have shown the improvement in accuracy and robustness of unsupervised word translation (UWT) by utilizing visual observations, which are universal representations across languages.Our work investigates the potential of using not only visual observations but also pretrained language-image models for enabling a more efficient and robust UWT. We develop a novel UWT method dubbed Word Alignment using Language-Image Pretraining (WALIP), leveraging visual observations via the shared image-text embedding space of CLIPs (Radford et al., 2021). WALIP has a two-step procedure. First, we retrieve word pairs with high confidences of similarity, computed using our proposed image-based fingerprints, which define the initial pivot for the alignment.Second, we apply our robust Procrustes algorithm to estimate the linear mapping between two embedding spaces, which iteratively corrects and refines the estimated alignment.Our extensive experiments show that WALIP improves upon the state-of-the-art performance of bilingual word alignment for a few language pairs across different word embeddings and displays great robustness to the dissimilarity of language pairs or training corpora for two word embeddings.
%R 10.18653/v1/2022.findings-emnlp.12
%U https://aclanthology.org/2022.findings-emnlp.12
%U https://doi.org/10.18653/v1/2022.findings-emnlp.12
%P 154-168
Markdown (Informal)
[Utilizing Language-Image Pretraining for Efficient and Robust Bilingual Word Alignment](https://aclanthology.org/2022.findings-emnlp.12) (Dinh et al., Findings 2022)
ACL
- Tuan Dinh, Jy-yong Sohn, Shashank Rajput, Timothy Ossowski, Yifei Ming, Junjie Hu, Dimitris Papailiopoulos, and Kangwook Lee. 2022. Utilizing Language-Image Pretraining for Efficient and Robust Bilingual Word Alignment. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 154–168, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.