@inproceedings{garcia-etal-2017-using,
title = "Using bilingual word-embeddings for multilingual collocation extraction",
author = "Garcia, Marcos and
Garc{\'\i}a-Salido, Marcos and
Alonso-Ramos, Margarita",
editor = "Markantonatou, Stella and
Ramisch, Carlos and
Savary, Agata and
Vincze, Veronika",
booktitle = "Proceedings of the 13th Workshop on Multiword Expressions ({MWE} 2017)",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1703",
doi = "10.18653/v1/W17-1703",
pages = "21--30",
abstract = "This paper presents a new strategy for multilingual collocation extraction which takes advantage of parallel corpora to learn bilingual word-embeddings. Monolingual collocation candidates are retrieved using Universal Dependencies, while the distributional models are then applied to search for equivalents of the elements of each collocation in the target languages. The proposed method extracts not only collocation equivalents with direct translation between languages, but also other cases where the collocations in the two languages are not literal translations of each other. Several experiments -evaluating collocations with three syntactic patterns- in English, Spanish, and Portuguese show that our approach can effectively extract large pairs of bilingual equivalents with an average precision of about 90{\%}. Moreover, preliminary results on comparable corpora suggest that the distributional models can be applied for identifying new bilingual collocations in different domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="garcia-etal-2017-using">
<titleInfo>
<title>Using bilingual word-embeddings for multilingual collocation extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">García-Salido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margarita</namePart>
<namePart type="family">Alonso-Ramos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on Multiword Expressions (MWE 2017)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Markantonatou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronika</namePart>
<namePart type="family">Vincze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a new strategy for multilingual collocation extraction which takes advantage of parallel corpora to learn bilingual word-embeddings. Monolingual collocation candidates are retrieved using Universal Dependencies, while the distributional models are then applied to search for equivalents of the elements of each collocation in the target languages. The proposed method extracts not only collocation equivalents with direct translation between languages, but also other cases where the collocations in the two languages are not literal translations of each other. Several experiments -evaluating collocations with three syntactic patterns- in English, Spanish, and Portuguese show that our approach can effectively extract large pairs of bilingual equivalents with an average precision of about 90%. Moreover, preliminary results on comparable corpora suggest that the distributional models can be applied for identifying new bilingual collocations in different domains.</abstract>
<identifier type="citekey">garcia-etal-2017-using</identifier>
<identifier type="doi">10.18653/v1/W17-1703</identifier>
<location>
<url>https://aclanthology.org/W17-1703</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>21</start>
<end>30</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using bilingual word-embeddings for multilingual collocation extraction
%A Garcia, Marcos
%A García-Salido, Marcos
%A Alonso-Ramos, Margarita
%Y Markantonatou, Stella
%Y Ramisch, Carlos
%Y Savary, Agata
%Y Vincze, Veronika
%S Proceedings of the 13th Workshop on Multiword Expressions (MWE 2017)
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F garcia-etal-2017-using
%X This paper presents a new strategy for multilingual collocation extraction which takes advantage of parallel corpora to learn bilingual word-embeddings. Monolingual collocation candidates are retrieved using Universal Dependencies, while the distributional models are then applied to search for equivalents of the elements of each collocation in the target languages. The proposed method extracts not only collocation equivalents with direct translation between languages, but also other cases where the collocations in the two languages are not literal translations of each other. Several experiments -evaluating collocations with three syntactic patterns- in English, Spanish, and Portuguese show that our approach can effectively extract large pairs of bilingual equivalents with an average precision of about 90%. Moreover, preliminary results on comparable corpora suggest that the distributional models can be applied for identifying new bilingual collocations in different domains.
%R 10.18653/v1/W17-1703
%U https://aclanthology.org/W17-1703
%U https://doi.org/10.18653/v1/W17-1703
%P 21-30
Markdown (Informal)
[Using bilingual word-embeddings for multilingual collocation extraction](https://aclanthology.org/W17-1703) (Garcia et al., MWE 2017)
ACL