@inproceedings{estve-etal-2024-vector,
title = "Vector Spaces for Quantifying Disparity of Multiword Expressions in Annotated Text",
author = "Est{\`e}ve, Louis and
Savary, Agata and
Lavergne, Thomas",
editor = "Fu, Xiyan and
Fleisig, Eve",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-srw.20",
doi = "10.18653/v1/2024.acl-srw.20",
pages = "110--130",
abstract = "Multiword Expressions (MWEs) make a goodcase study for linguistic diversity due to theiridiosyncratic nature. Defining MWE canonicalforms as types, diversity may be measurednotably through disparity, based on pairwisedistances between types. To this aim, wetrain static MWE-aware word embeddings forverbal MWEs in 14 languages, and we showinteresting properties of these vector spaces.We use these vector spaces to implement theso-called functional diversity measure. Weapply this measure to the results of severalMWE identification systems. We find that,although MWE vector spaces are meaningful ata local scale, the disparity measure aggregatingthem at a global scale strongly correlateswith the number of types, which questions itsusefulness in presence of simpler diversitymetrics such as variety. We make the vectorspaces we generated available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="estve-etal-2024-vector">
<titleInfo>
<title>Vector Spaces for Quantifying Disparity of Multiword Expressions in Annotated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Louis</namePart>
<namePart type="family">Estève</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Lavergne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiyan</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eve</namePart>
<namePart type="family">Fleisig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multiword Expressions (MWEs) make a goodcase study for linguistic diversity due to theiridiosyncratic nature. Defining MWE canonicalforms as types, diversity may be measurednotably through disparity, based on pairwisedistances between types. To this aim, wetrain static MWE-aware word embeddings forverbal MWEs in 14 languages, and we showinteresting properties of these vector spaces.We use these vector spaces to implement theso-called functional diversity measure. Weapply this measure to the results of severalMWE identification systems. We find that,although MWE vector spaces are meaningful ata local scale, the disparity measure aggregatingthem at a global scale strongly correlateswith the number of types, which questions itsusefulness in presence of simpler diversitymetrics such as variety. We make the vectorspaces we generated available.</abstract>
<identifier type="citekey">estve-etal-2024-vector</identifier>
<identifier type="doi">10.18653/v1/2024.acl-srw.20</identifier>
<location>
<url>https://aclanthology.org/2024.acl-srw.20</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>110</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Vector Spaces for Quantifying Disparity of Multiword Expressions in Annotated Text
%A Estève, Louis
%A Savary, Agata
%A Lavergne, Thomas
%Y Fu, Xiyan
%Y Fleisig, Eve
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F estve-etal-2024-vector
%X Multiword Expressions (MWEs) make a goodcase study for linguistic diversity due to theiridiosyncratic nature. Defining MWE canonicalforms as types, diversity may be measurednotably through disparity, based on pairwisedistances between types. To this aim, wetrain static MWE-aware word embeddings forverbal MWEs in 14 languages, and we showinteresting properties of these vector spaces.We use these vector spaces to implement theso-called functional diversity measure. Weapply this measure to the results of severalMWE identification systems. We find that,although MWE vector spaces are meaningful ata local scale, the disparity measure aggregatingthem at a global scale strongly correlateswith the number of types, which questions itsusefulness in presence of simpler diversitymetrics such as variety. We make the vectorspaces we generated available.
%R 10.18653/v1/2024.acl-srw.20
%U https://aclanthology.org/2024.acl-srw.20
%U https://doi.org/10.18653/v1/2024.acl-srw.20
%P 110-130
Markdown (Informal)
[Vector Spaces for Quantifying Disparity of Multiword Expressions in Annotated Text](https://aclanthology.org/2024.acl-srw.20) (Estève et al., ACL 2024)
ACL