@inproceedings{zeng-bhat-2023-unified,
title = "Unified Representation for Non-compositional and Compositional Expressions",
author = "Zeng, Ziheng and
Bhat, Suma",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.783",
doi = "10.18653/v1/2023.findings-emnlp.783",
pages = "11696--11710",
abstract = "Accurate processing of non-compositional language relies on generating good representations for such expressions. In this work, we study the representation of language non-compositionality by proposing a language model, PIER+, that builds on BART and can create semantically meaningful and contextually appropriate representations for English potentially idiomatic expressions (PIEs). PIEs are characterized by their non-compositionality and contextual ambiguity in their literal and idiomatic interpretations. Via intrinsic evaluation on embedding quality and extrinsic evaluation on PIE processing and NLU tasks, we show that representations generated by PIER+ result in 33{\%} higher homogeneity score for embedding clustering than BART, whereas 3.12{\%} and 3.29{\%} gains in accuracy and sequence accuracy for PIE sense classification and span detection compared to the state-of-the-art IE representation model, GIEA. These gains are achieved without sacrificing PIER+{'}s performance on NLU tasks (+/- 1{\%} accuracy) compared to BART.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zeng-bhat-2023-unified">
<titleInfo>
<title>Unified Representation for Non-compositional and Compositional Expressions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ziheng</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suma</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Accurate processing of non-compositional language relies on generating good representations for such expressions. In this work, we study the representation of language non-compositionality by proposing a language model, PIER+, that builds on BART and can create semantically meaningful and contextually appropriate representations for English potentially idiomatic expressions (PIEs). PIEs are characterized by their non-compositionality and contextual ambiguity in their literal and idiomatic interpretations. Via intrinsic evaluation on embedding quality and extrinsic evaluation on PIE processing and NLU tasks, we show that representations generated by PIER+ result in 33% higher homogeneity score for embedding clustering than BART, whereas 3.12% and 3.29% gains in accuracy and sequence accuracy for PIE sense classification and span detection compared to the state-of-the-art IE representation model, GIEA. These gains are achieved without sacrificing PIER+’s performance on NLU tasks (+/- 1% accuracy) compared to BART.</abstract>
<identifier type="citekey">zeng-bhat-2023-unified</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.783</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.783</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>11696</start>
<end>11710</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unified Representation for Non-compositional and Compositional Expressions
%A Zeng, Ziheng
%A Bhat, Suma
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F zeng-bhat-2023-unified
%X Accurate processing of non-compositional language relies on generating good representations for such expressions. In this work, we study the representation of language non-compositionality by proposing a language model, PIER+, that builds on BART and can create semantically meaningful and contextually appropriate representations for English potentially idiomatic expressions (PIEs). PIEs are characterized by their non-compositionality and contextual ambiguity in their literal and idiomatic interpretations. Via intrinsic evaluation on embedding quality and extrinsic evaluation on PIE processing and NLU tasks, we show that representations generated by PIER+ result in 33% higher homogeneity score for embedding clustering than BART, whereas 3.12% and 3.29% gains in accuracy and sequence accuracy for PIE sense classification and span detection compared to the state-of-the-art IE representation model, GIEA. These gains are achieved without sacrificing PIER+’s performance on NLU tasks (+/- 1% accuracy) compared to BART.
%R 10.18653/v1/2023.findings-emnlp.783
%U https://aclanthology.org/2023.findings-emnlp.783
%U https://doi.org/10.18653/v1/2023.findings-emnlp.783
%P 11696-11710
Markdown (Informal)
[Unified Representation for Non-compositional and Compositional Expressions](https://aclanthology.org/2023.findings-emnlp.783) (Zeng & Bhat, Findings 2023)
ACL