@inproceedings{ki-etal-2024-inspecting,
title = "Inspecting Soundness of {AMR} Similarity Metrics in terms of Equivalence and Inequivalence",
author = "Ki, Kyung Seo and
Kim, Bugeun and
Gweon, Gahgene",
editor = "Bollegala, Danushka and
Shwartz, Vered",
booktitle = "Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.starsem-1.32/",
doi = "10.18653/v1/2024.starsem-1.32",
pages = "402--409",
abstract = "In this study, we investigate soundness of current Abstract Meaning Representation (AMR) similarity metrics in terms of equivalence and inequivalence. Specifically, AMR guidelines provide several equivalence and inequivalence conditions to reflect the meaning aspect of the semantics. Thus, it is important to examine an AMR metric`s soundness, i.e., whether the metric correctly reflects the guidelines. However, the existing metrics have less investigated their soundness. In this work, we propose a new experimental method using simulated data and a series of statistical tests to verify the metric`s soundness. Our experimental result revealed that all existing metrics such as Smatch, SemBLEU, S2match, Smatch++, WWLK-theta, WWLK-k3e2n, and SEMA did not fully meet the AMR guidelines in terms of equivalence and inequivalence aspects. Also, to alleviate this soundness problem, we suggest a revised metric called Smatch{\#}, which adopts simple graph standardization technique that can improve the soundness of an existing metric."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ki-etal-2024-inspecting">
<titleInfo>
<title>Inspecting Soundness of AMR Similarity Metrics in terms of Equivalence and Inequivalence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kyung</namePart>
<namePart type="given">Seo</namePart>
<namePart type="family">Ki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bugeun</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gahgene</namePart>
<namePart type="family">Gweon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danushka</namePart>
<namePart type="family">Bollegala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we investigate soundness of current Abstract Meaning Representation (AMR) similarity metrics in terms of equivalence and inequivalence. Specifically, AMR guidelines provide several equivalence and inequivalence conditions to reflect the meaning aspect of the semantics. Thus, it is important to examine an AMR metric‘s soundness, i.e., whether the metric correctly reflects the guidelines. However, the existing metrics have less investigated their soundness. In this work, we propose a new experimental method using simulated data and a series of statistical tests to verify the metric‘s soundness. Our experimental result revealed that all existing metrics such as Smatch, SemBLEU, S2match, Smatch++, WWLK-theta, WWLK-k3e2n, and SEMA did not fully meet the AMR guidelines in terms of equivalence and inequivalence aspects. Also, to alleviate this soundness problem, we suggest a revised metric called Smatch#, which adopts simple graph standardization technique that can improve the soundness of an existing metric.</abstract>
<identifier type="citekey">ki-etal-2024-inspecting</identifier>
<identifier type="doi">10.18653/v1/2024.starsem-1.32</identifier>
<location>
<url>https://aclanthology.org/2024.starsem-1.32/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>402</start>
<end>409</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Inspecting Soundness of AMR Similarity Metrics in terms of Equivalence and Inequivalence
%A Ki, Kyung Seo
%A Kim, Bugeun
%A Gweon, Gahgene
%Y Bollegala, Danushka
%Y Shwartz, Vered
%S Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F ki-etal-2024-inspecting
%X In this study, we investigate soundness of current Abstract Meaning Representation (AMR) similarity metrics in terms of equivalence and inequivalence. Specifically, AMR guidelines provide several equivalence and inequivalence conditions to reflect the meaning aspect of the semantics. Thus, it is important to examine an AMR metric‘s soundness, i.e., whether the metric correctly reflects the guidelines. However, the existing metrics have less investigated their soundness. In this work, we propose a new experimental method using simulated data and a series of statistical tests to verify the metric‘s soundness. Our experimental result revealed that all existing metrics such as Smatch, SemBLEU, S2match, Smatch++, WWLK-theta, WWLK-k3e2n, and SEMA did not fully meet the AMR guidelines in terms of equivalence and inequivalence aspects. Also, to alleviate this soundness problem, we suggest a revised metric called Smatch#, which adopts simple graph standardization technique that can improve the soundness of an existing metric.
%R 10.18653/v1/2024.starsem-1.32
%U https://aclanthology.org/2024.starsem-1.32/
%U https://doi.org/10.18653/v1/2024.starsem-1.32
%P 402-409
Markdown (Informal)
[Inspecting Soundness of AMR Similarity Metrics in terms of Equivalence and Inequivalence](https://aclanthology.org/2024.starsem-1.32/) (Ki et al., *SEM 2024)
ACL